Python groupby Examples, itertools.groupby Python Examples

Example #1

0

Show file

File: project.py Project: KaiBot3000/osm-tasking-manager2

def get_contributors(project):
    """ get the list of contributors and the tasks they worked on """

    # filter on tasks with state DONE
    filter = and_(
        TaskState.project_id == project.id,
        TaskState.state == TaskState.state_done
    )

    tasks = DBSession.query(TaskState.task_id, User.username) \
                     .join(TaskState.user) \
                     .filter(filter) \
                     .order_by(TaskState.user_id) \
                     .all()

    contributors = {}
    for user, tasks in itertools.groupby(tasks, key=lambda t: t.username):
        if user not in contributors:
            contributors[user] = {}
        contributors[user]['done'] = [task[0] for task in tasks]

    assigned = DBSession.query(Task.id, User.username) \
        .join(Task.assigned_to) \
        .filter(
            Task.project_id == project.id,
            Task.assigned_to_id != None  # noqa
        ) \
        .order_by(Task.assigned_to_id)
    for user, tasks in itertools.groupby(assigned,
                                         key=lambda t: t.username):
        if user not in contributors:
            contributors[user] = {}
        contributors[user]['assigned'] = [task[0] for task in tasks]

    return contributors

Example #2

0

Show file

File: charts.py Project: Maplecroft/django-chartit

 def _groupby_x_axis_and_vqs(self):
     """Returns a list of list of lists where each list has the term and 
     option dict with the same xAxis and within each list with same xAxis,
     all items in same sub-list have items with same ValueQuerySet.
     
     Here is an example of what this function would return. ::
     
     [
      [[(term-1-A-1, opts-1-A-1), (term-1-A-2, opts-1-A-2), ...],
       [(term-1-B-1, opts-1-B-1), (term-1-B-2, opts-1-B-2), ...],
       ...],
      [[term-2-A-1, opts-2-A-1), (term-2-A-2, opts-2-A-2), ...],
       [term-2-B-2, opts-2-B-2), (term-2-B-2, opts-2-B-2), ...],
       ...],
       ...
       ]
       
     In the above example,
     
     - term-1-*-* all have same xAxis.
     - term-*-A-* all are from same ValueQuerySet (table)
     """
     dss = self.datasource.series
     x_axis_vqs_groups = defaultdict(dict)
     sort_fn = lambda tk_td4: tk_td4[1].get('xAxis', 0)
     so = sorted(list(self.series_options.items()), key=sort_fn)
     x_axis_groups = groupby(so, sort_fn)
     for (x_axis, itr1) in x_axis_groups:
         sort_fn = lambda tk_td: dss[tk_td[1]['_x_axis_term']]['_data']
         itr1 = sorted(itr1, key=sort_fn)
         for _vqs_num, (_data, itr2) in enumerate(groupby(itr1, sort_fn)):
             x_axis_vqs_groups[x_axis][_vqs_num] = _x_vqs = {}
             for tk, td in itr2:
                 _x_vqs.setdefault(td['_x_axis_term'], []).append(tk)
     return x_axis_vqs_groups

Example #3

0

Show file

File: classifier.py Project: coumineol/Thesis

def finalScores(gene, firstGroup, secondGroup, patientCountFirst, patientCountSecond):
    #bu iki kisim, ayni gendeki birden fazla mutasyonu tek mutasyon olarak saymak icin

    firstGroupFiltered = []
    secondGroupFiltered = []

    for variantList in firstGroup:
        variantsInOneFile = []
        for variant in variantList:
            variantsInOnePatient = []
            if variant[1] == gene:
                variantsInOneFile.append((variant[0], variant[-1]*variant[-2]*variant[-3]))
        firstGroupFiltered.append(variantsInOneFile)

    for variantList in secondGroup:
        variantsInOneFile = []
        for variant in variantList:
            variantsInOnePatient = []
            if variant[1] == gene:
                variantsInOneFile.append((variant[0], variant[-1]*variant[-2]*variant[-3]))
        secondGroupFiltered.append(variantsInOneFile)

    firstCounter = 0
    for variantList in firstGroupFiltered:
        for key, group in itertools.groupby(variantList, lambda x: x[0]):
            firstCounter += max([i[1] for i in group])

    secondCounter = 0
    for variantList in secondGroupFiltered:
        for key, group in itertools.groupby(variantList, lambda x: x[0]):
            secondCounter += max([i[1] for i in group])

    return (firstCounter/patientCountFirst - secondCounter/patientCountSecond)

Example #4

0

Show file

File: messaging.py Project: Netflix/lemur

def get_eligible_certificates(exclude=None):
    """
    Finds all certificates that are eligible for certificate expiration.
    :param exclude:
    :return:
    """
    certificates = defaultdict(dict)
    certs = get_certificates(exclude=exclude)

    # group by owner
    for owner, items in groupby(certs, lambda x: x.owner):
        notification_groups = []

        for certificate in items:
            notifications = needs_notification(certificate)

            if notifications:
                for notification in notifications:
                    notification_groups.append((notification, certificate))

        # group by notification
        for notification, items in groupby(notification_groups, lambda x: x[0].label):
            certificates[owner][notification] = list(items)

    return certificates

Example #5

0

Show file

File: gapFiller.py Project: kakitone/finishingTool

def filterRepeatPair(matchPair):
    newMatchPair = []
    # Format : [[3, 5, 2486, 2532, 2486, 'Read48_d'] ]
    inNoList = []
    outNoList = []
    
    matchPair.sort(key = itemgetter(0))
    for key, items in groupby(matchPair, itemgetter(0)):
        ct = 0
        anotherSideList = []
        for eachitem in items:
            ct = ct +1 
            anotherSideList.append(eachitem[1])
        
        if len(set(anotherSideList)) > 1 :
            inNoList.append(key)
    
    matchPair.sort(key= itemgetter(1))
    for key, items in groupby(matchPair, itemgetter(1)):
        ct = 0
        anotherSideList = []
        for eachitem in items:
            ct = ct +1
            anotherSideList.append(eachitem[0])
         
        if len(set(anotherSideList)) > 1 :
            outNoList.append(key)
    
    for eachitem in matchPair:
        if not eachitem[0] in inNoList and not eachitem[1] in outNoList:
            newMatchPair.append(eachitem)
    
    return newMatchPair

Example #6

0

Show file

File: admin.py Project: egorvinogradov/staff1

    def personal_view(self, request, week):
        items = m.DishOrderDayItem.objects\
            .filter(order__week=week, count__gt=0)\
            .select_related(depth=4)\
            .order_by(
                'order__user__profile__office__id', 
                'order__user__first_name', 
                'order__user__pk', 
                'dish_day__day__pk',
                'dish_day__dish__pk',
            )

        offices = []

        items = list(items)
        self.fix_profiles([i.order for i in items])

        for office, usersseq in groupby(list(items), lambda i: i.order.user.profile.office):
            users = []
            for user, seq in groupby(usersseq, lambda i: i.order.user):
                seq = list(seq)
                users.append((
                    user,
                    group_by_materialize(groupby(seq, lambda i: i.dish_day.day)),
                    ))

            offices.append(( office, users ))

        return direct_to_template(request, 'dinner/report_personal.html', {
            'week': week,
            'offices': offices,
            })

Example #7

0

Show file

File: sqlGotestnew.py Project: wasalp/Scripts

def GetGoAnnotation(seqids):

    db = pymysql.connect(host = "mysql-amigo.ebi.ac.uk",
                     user = "******",
                     passwd = "amigo",
                     db = "go_latest",
                     port = 4085)

    cur = db.cursor()
    cur.execute(
          """
      SELECT
        term.name,
        term.acc,
        term.term_type
       FROM   gene_product
        INNER JOIN dbxref ON (gene_product.dbxref_id=dbxref.id)
        INNER JOIN species ON (gene_product.species_id=species.id)
        INNER JOIN association ON (gene_product.id=association.gene_product_id)
        INNER JOIN evidence ON (association.id=evidence.association_id)
        INNER JOIN term ON (association.term_id=term.id)
       WHERE
         dbxref.xref_key = %s;
         """, seqids)
    List = list()
    GO = list()
    f= cur.fetchall()
    for i in f:
        List.append(i[0] + ":" + i[2])
        GO.append(i[1])
    List = list(map(itemgetter(0), groupby(List)))
    GO = list(map(itemgetter(0), groupby(GO)))
    db.close()
    return[seqids,List,GO]

Example #8

0

Show file

File: get_stayd.py Project: lrpopeyou/cluster_fp_user

def mapper():
    for user,data in itertools.groupby(get_data(sys.stdin,timecol = 1),operator.itemgetter(0)):
        '''for each user'''
#        print '--------',user
        for day,group in itertools.groupby(data,lambda k:datetime.fromtimestamp(long(k[1])).strftime('%Y%m%d')):
#            print day
            process_line(user,group)

Example #9

0

Show file

File: data_collection.py Project: DirkHaehnel/omero.biobank

 def record(self, records, otsv, rtsv, blocking_validation):
   def records_by_chunk(batch_size, records):
     offset = 0
     while len(records[offset:]) > 0:
       yield records[offset:offset+batch_size]
       offset += batch_size
   def get_data_collection(label, action):
     if label in self.preloaded_data_collections:
       return self.preloaded_data_collections[label]
     else:
       dc_conf = {'label' : label, 'action': action}
       return self.kb.factory.create(self.kb.DataCollection, dc_conf)
   if len(records) == 0:
     msg = 'No records are going to be imported'
     self.logger.critical(msg)
     raise core.ImporterValidationError(msg)
   study = self.find_study(records)
   self.data_sample_klass = self.find_data_sample_klass(records)
   self.preload_data_samples()
   self.preload_data_collections()
   asetup = self.get_action_setup('importer.data_collection-%f' % time.time(),
                                  json.dumps(self.action_setup_conf))
   device = self.get_device('importer-%s.data_collection' % version,
                            'CRS4', 'IMPORT', version)
   conf = {
     'setup': asetup,
     'device': device,
     'actionCategory': self.kb.ActionCategory.PROCESSING,
     'operator': self.operator,
     'context': study,
     }
   action = self.kb.factory.create(self.kb.Action, conf).save()
   def keyfunc(r): return r['label']
   sub_records = []
   data_collections = {}
   records = sorted(records, key=keyfunc)
   for k, g in it.groupby(records, keyfunc):
     data_collections[k] = get_data_collection(k, action)
     good_records, bad_records = self.do_consistency_checks(data_collections[k], list(g))
     sub_records.append(good_records)
     for br in bad_records:
       rtsv.writerow(br)
     if blocking_validation and len(bad_records) >= 1:
       self.kb.delete(action)
       raise core.ImporterValidationError('%d invalid records' % len(bad_records))
   records = sum(sub_records, [])
   if len(records) == 0:
     self.kb.delete(action)
     msg = 'No records are going to be imported'
     self.logger.warning(msg)
     sys.exit(0)
   records = sorted(records, key=keyfunc)
   for k, g in it.groupby(records, keyfunc):
     dc = data_collections[k]
     if not dc.is_mapped():
       dc.save()
     for i, c in enumerate(records_by_chunk(self.batch_size, list(g))):
       self.logger.info('start processing chunk %s-%d' % (k, i))
       self.process_chunk(otsv, study, dc, c)
       self.logger.info('done processing chunk %s-%d' % (k,i))

Example #10

0

Show file

File: controllers.py Project: SevenLines/tealeaf-flask

    def dispatch_request(self):
        template = "university/index.html"
        if request.headers.get("X-Pjax", None):
            template = "university/_charts.html"

        marks = (
            Mark.query.join(Student, Group)
            .filter(Group.year == Group.current_year())
            .with_entities(Mark.value, Group.id.label("group_id"), Student.id.label("student_id"), Student.sex)
            .order_by("group_id", "value", "sex")
        )

        marks = {
            groupd_id: {
                "marks": {
                    v: {
                        "marks_count": len(marks),
                        "marks_summ": sum(i.value if i.value <= Mark.MARK_INCREDIBLE else 1 for i in marks),
                    }
                    for v, marks in {v: list(marks) for v, marks in groupby(items, lambda x: x.value)}.items()
                    if Mark.MARK_ABSENT <= v <= Mark.MARK_INCREDIBLE or v == Mark.MARK_SHINING
                },
                "marks_count": len(items),
                "marks_count_positive": len(list(i for i in items if Mark.MARK_BASE < i.value <= Mark.MARK_INCREDIBLE)),
                "marks_summ": sum(i.value for i in items),
                "marks_summ_positive": sum(i.value for i in items if Mark.MARK_BASE < i.value <= Mark.MARK_INCREDIBLE),
            }
            for groupd_id, items in {id: list(items) for id, items in groupby(marks, lambda x: x.group_id)}.items()
        }

        for group_id, info in marks.items():
            info["max_summ"] = max(i["marks_summ"] for i in info["marks"].values())

        return render_template(template, **{"marks": marks})

Example #11

0

Show file

File: funkcije.py Project: shrx/RaspberryPi

def resultAbs(x6):
	max = sorted(x6,key=operator.itemgetter(1))
	max = [list(group) for key,group in itertools.groupby(max,operator.itemgetter(1))][-1][0]
	min = sorted(x6,key=operator.itemgetter(1))
	min = [list(group) for key,group in itertools.groupby(min,operator.itemgetter(1))][0][0]
	print(max[0],"%.2f" % max[1],sep=",")
	print(min[0],"%.2f" % min[1],sep=",")

Example #12

0

Show file

File: hic_prep.py Project: yk-tanigawa/QLoop-dev

def fasta_iter(fasta_name):
    '''
    given a fasta file. yield tuples of header, sequence
    modified from Brent Pedersen
    Correct Way To Parse A Fasta File In Python
    https://www.biostars.org/p/710/
    '''
    if((fasta_name[-3:] == '.gz') or 
       (fasta_name[-5:] == '.gzip')):
        with gzip.open(fasta_name, 'rb') as f:
            data = (x[1] for x in groupby(f, lambda line: line.decode('utf-8')[0] == ">"))
            for header in data:
                header = header.__next__().decode('utf-8')[1:].strip()
                seq = "".join(s.decode('utf-8').strip() for s in data.__next__())
                yield(header, seq)
    else:
        with open(fasta_name) as f:
            # ditch the boolean (x[0]) and just keep the header or sequence since
            # we know they alternate.
            data = (x[1] for x in groupby(f, lambda line: line[0] == ">"))
            for header in data:
                # drop the ">"
                header = header.__next__()[1:].strip()
                # join all sequence lines to one.
                seq = "".join(s.strip() for s in data.__next__())
                yield(header, seq)

Example #13

0

Show file

File: naics.py Project: OpenCIOC/onlineresources

def get_exclusions(request, naics_code, link_page=None, all_langs=False):
	with request.connmgr.get_connection() as conn:
		cursor = conn.execute('EXEC dbo.sp_NAICS_Exclusion_l ?,?', str(naics_code), all_langs)
		exclusions = cursor.fetchall()
		
		cursor.nextset()

		uses = cursor.fetchall()

		cursor.close()

	
	uses = dict((k, list(v)) for k,v in groupby(uses, attrgetter('Exclusion_ID')))

	output = []
	for establishment, exclusions in groupby(exclusions, attrgetter('Establishment')):
		if establishment:
			output.extend([Markup('<p>'), _('Establishments primarily engaged in:', request), Markup('</p>')])

		output.append(Markup('<ul>'))
		for exclusion in exclusions:
			use_instead = "; ".join(link_code(request, x.Code, x.Code, link_page) + ' ' + escape(x.Classification) for x in (uses.get(exclusion.Exclusion_ID) or []))
			if use_instead:
				use_instead = use_instead.join([" (", ")"])

			output.extend([Markup('<li>'), escape(exclusion.Description), use_instead,Markup('</li>')])
			

		output.append(Markup('</ul>'))

	return Markup(''.join(output))

Example #14

0

Show file

File: csv_to_key_reducer.py Project: bdarfler/HackReduce-3-Boston

def main():        
    for fingerprint, fgroup in groupby(read_input(sys.stdin), itemgetter(1)):
        names = []
        for name, ngroup in groupby(fgroup, itemgetter(2)):            
            names.append(name.strip())
        if len(names) > 1:    
            print '%s\t%s' % (fingerprint.strip(), names)

Example #15

0

Show file

File: list.py Project: vovagalchenko/class-ninja

 def do_controller_specific_work(self):
     if self.user is None:
         raise Authorization_Exception("You must be logged in to list your targets.")
     targets = self.user.targets
     events = sorted(map(lambda target: target.event, targets), key = lambda event: event.section_id)
     targets_by_event_id = {}
     for target in targets:
         targets_by_event_id[target.event_id] = target.target_id
     events_by_section_id = dict((k, list(g)) for k, g in groupby(events, lambda event: event.section_id))
     db_session = DB_Session_Factory.get_db_session()
     sections = db_session.query(Section).filter(Section.section_id.in_(events_by_section_id.keys())).order_by(Section.course_id).all()
     sections_by_course_id = dict((k, list(g)) for k, g in groupby(sections, lambda section: section.course_id))
     courses = db_session.query(Course).filter(Course.course_id.in_(sections_by_course_id.keys())).all()
     response = []
     for course in courses:
         course_dict = course.for_api()
         sections_list = []
         for section in sections_by_course_id.get(course.course_id):
             section_dict = section.for_api()
             events = []
             for event_in_this_section in events_by_section_id[section.section_id]:
                 event_dict = event_in_this_section.for_api()
                 target_id_number = targets_by_event_id.get(event_in_this_section.event_id, None)
                 if target_id_number is not None:
                      event_dict['target_id'] = str(target_id_number)
                 events.append(event_dict)
             section_dict['events'] = events
             sections_list.append(section_dict)
         course_dict['course_sections'] = sections_list
         response.append(course_dict)
     return HTTP_Response('200 OK', {'targets' : response})

Example #16

0

Show file

File: utils.py Project: george-silva/django-timepiece

def grouped_totals(entries):
    select = {"day": {"date": """DATE_TRUNC('day', end_time)"""},
              "week": {"date": """DATE_TRUNC('week', end_time)"""}}
    weekly = entries.extra(select=select["week"]).values('date', 'billable')
    weekly = weekly.annotate(hours=Sum('hours')).order_by('date')
    daily = entries.extra(select=select["day"]).values('date', 'project__name',
                                                       'billable')
    daily = daily.annotate(hours=Sum('hours')).order_by('date',
                                                        'project__name')
    weeks = {}
    for week, week_entries in groupby(weekly, lambda x: x['date']):
        try:
            if timezone.is_naive(week):
                week = timezone.make_aware(week,
                    timezone.get_current_timezone())
        except AttributeError:
            week = datetime.datetime.combine(week,
                timezone.get_current_timezone())
        weeks[week] = get_hours(week_entries)
    days = []
    last_week = None
    for day, day_entries in groupby(daily, lambda x: x['date']):
        week = get_week_start(day)
        if last_week and week > last_week:
            yield last_week, weeks.get(last_week, {}), days
            days = []
        days.append((day, daily_summary(day_entries)))
        last_week = week
    yield week, weeks.get(week, {}), days

Example #17

0

Show file

File: views.py Project: dfalk/tacticproject

def report(request):

    variation_list = ProductVariation.objects.order_by(
        "product__vendor", "product__position", "product__title"
    ).select_related("product__vendor")

    total_price = 0
    total_count = 0
    report = []
    for vendor, vrs in groupby(variation_list, lambda x: x.product.vendor):
        vendor_vars = list(vrs)
        prod_details = []
        vendor_price = 0
        vendor_count = 0
        for prod, vrs in groupby(vendor_vars, lambda x: x.product):
            cur_vars = list(vrs)
            stock_price = 0
            stock_count = 0
            for vr in cur_vars:
                if (vr.num_in_stock > 0) and (vr.unit_price):
                    stock_price += vr.unit_price * vr.num_in_stock
                    stock_count += vr.num_in_stock
            vrs_sum = (stock_count, stock_price)
            vendor_price += stock_price
            vendor_count += stock_count
            prod_details.append({"product": prod, "variations": cur_vars, "summary": vrs_sum})
        vendor_sum = (vendor_count, vendor_price)
        total_price += vendor_price
        total_count += vendor_count
        report.append({"vendor": vendor, "prod_details": prod_details, "summary": vendor_sum})
    total_sum = (total_count, total_price)

    context = {"report": report, "variation_list": variation_list, "total_sum": total_sum}
    return direct_to_template(request, "admin/shop/report.html", context)

Example #18

0

Show file

File: datavalidation.py Project: pcolmant/repanier

def collapse_cell_addresses(cells, input_ranges=()):
    """ Collapse a collection of cell co-ordinates down into an optimal
        range or collection of ranges.

        E.g. Cells A1, A2, A3, B1, B2 and B3 should have the data-validation
        object applied, attempt to collapse down to a single range, A1:B3.

        Currently only collapsing contiguous vertical ranges (i.e. above
        example results in A1:A3 B1:B3).  More work to come.
    """
    keyfunc = lambda x: x[0]

    # Get the raw coordinates for each cell given
    raw_coords = [coordinate_from_string(cell) for cell in cells]

    # Group up as {column: [list of rows]}
    grouped_coords = OrderedDict((k, [c[1] for c in g]) for k, g in
                          groupby(sorted(raw_coords, key=keyfunc), keyfunc))
    ranges = list(input_ranges)

    # For each column, find contiguous ranges of rows
    for column in grouped_coords:
        rows = sorted(grouped_coords[column])
        grouped_rows = [[r[1] for r in list(g)] for k, g in
                        groupby(enumerate(rows),
                        lambda x: x[0] - x[1])]
        for rows in grouped_rows:
            if len(rows) == 0:
                pass
            elif len(rows) == 1:
                ranges.append("%s%d" % (column, rows[0]))
            else:
                ranges.append("%s%d:%s%d" % (column, rows[0], column, rows[-1]))

    return " ".join(ranges)

Example #19

0

Show file

File: views.py Project: apinsard/qapy

    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)

        queryset = Account.objects.filter(owner_id=self.request.user.pk)
        queryset = queryset.aggregate(total=Sum('amount'))
        amount = total_amount = queryset['total']

        transactions = Transaction.objects.filter(
            account__owner_id=self.request.user.pk).order_by('-date')

        by_weeks_keys = []
        by_weeks_values = []
        for k, v in groupby(transactions, _week_keyfunc):
            by_weeks_keys.append(k.split('-')[1])
            by_weeks_values.append(str(amount))
            amount -= sum(x.amount for x in v)

        by_months_keys = []
        by_months_values = []
        for k, v in groupby(transactions, lambda x: format(x.date, "%Y-%m")):
            by_months_keys.append(k.split('-')[1])
            by_months_values.append(str(sum(x.amount for x in v)))

        graph_weekly = {
            'keys': '[{}]'.format(','.join(by_weeks_keys[::-1])),
            'values': '[{}]'.format(','.join(by_weeks_values[::-1])),
        }

        graph_monthly = {
            'keys': '[{}]'.format(','.join(by_months_keys[::-1])),
            'values': '[{}]'.format(','.join(by_months_values[::-1])),
        }
        context['graph_weekly'] = graph_weekly
        context['graph_monthly'] = graph_monthly
        return context

Example #20

0

Show file

File: sctcrfuncs.py Project: ElementoLab/scTCRseq

def blastall_v_regions(myFastq1,myFastq2,myRef,outputfile,eVal,blastallDir):
    fns={}
    chunk=10**4
    with open(myFastq1, 'r') as datafile1:
        groups = groupby(datafile1, key=lambda k, line=count(): next(line) // chunk)
        for k, group in groups:
            with tempfile.NamedTemporaryFile(delete=False,
                           dir=tempfile.mkdtemp(),prefix='{}_'.format(str(k))) as outfile:
                outfile.write(''.join(group))
                fns[k]=outfile.name   
            blastn_cline = blastallDir+"blastall -p blastn -o "+str(outfile.name)+".blast.out -i "+str(outfile.name)+" -d "+myRef+" -e "+str(eVal)+" -m 8 -b 1"    
            os.system(blastn_cline+" > /dev/null 2>&1")
            os.system("cat "+str(outfile.name)+".blast.out >> "+outputfile)
            os.remove(str(outfile.name)+".blast.out")
            os.remove(str(outfile.name))
            testvar=commands.getstatusoutput("dirname "+str(outfile.name))
            os.system("rm -r "+testvar[1])
    fns={}
    with open(myFastq2, 'r') as datafile2:
        groups = groupby(datafile2, key=lambda k, line=count(): next(line) // chunk)
        for k, group in groups:
            with tempfile.NamedTemporaryFile(delete=False,
                           dir=tempfile.mkdtemp(),prefix='{}_'.format(str(k))) as outfile:
                outfile.write(''.join(group))
                fns[k]=outfile.name   
            blastn_cline = blastallDir+"blastall -p blastn -o "+str(outfile.name)+".blast.out -i "+str(outfile.name)+" -d "+myRef+" -e "+str(eVal)+" -m 8 -b 1"    
            os.system(blastn_cline+" > /dev/null 2>&1")
            os.system("cat "+str(outfile.name)+".blast.out >> "+outputfile)
            os.remove(str(outfile.name)+".blast.out")
            os.remove(str(outfile.name))
            testvar=commands.getstatusoutput("dirname "+str(outfile.name))
            os.system("rm -r "+testvar[1])

Example #21

0

Show file

File: commission.py Project: coopengo/commission

    def invoice(cls, commissions):
        pool = Pool()
        Invoice = pool.get('account.invoice')

        key = lambda c: c._group_to_invoice_key()
        commissions.sort(key=key)
        invoices = []
        to_write = []
        for key, commissions in groupby(commissions, key=key):
            commissions = list(commissions)
            key = dict(key)
            invoice = cls._get_invoice(key)
            invoice.save()
            invoices.append(invoice)

            key = lambda c: c._group_to_invoice_line_key()
            commissions.sort(key=key)
            for key, commissions in groupby(commissions, key=key):
                commissions = [c for c in commissions if not c.invoice_line]
                key = dict(key)
                invoice_line = cls._get_invoice_line(key, invoice, commissions)
                invoice_line.save()
                to_write.extend([commissions, {
                            'invoice_line': invoice_line.id,
                            }])
        if to_write:
            cls.write(*to_write)
        Invoice.update_taxes(invoices)

Example #22

0

Show file

File: views.py Project: KBNLresearch/spatio-temporal-topics

def count_concepts(concepts):
    # count concepts by location and time
    counts = []
    if settings.DATE_GROUP_TYPE == 'year':
        entries = [(concept[0], c[0], c[1].split('-')[0]) 
            for concept in concepts 
            for c in concept[-1]]
    elif settings.DATE_GROUP_TYPE == 'month':
        entries = [(concept[0], c[0], c[1].rsplit('-', 1)[0]) 
            for concept in concepts 
            for c in concept[-1]]

    # group by location
    entries.sort(key=lambda x: x[1])
    for loc, group in itertools.groupby(entries, lambda x: x[1]):
        concepts = list([(g[0], g[2]) for g in group])   
        # group by concepts
        concepts.sort(key=lambda x: x[0])
        concept_counts = []
        for concept, group2 in itertools.groupby(concepts, lambda x: x[0]):
            # Group by dates
            dates = [g[1] for g in group2]
            dates.sort()
            date_counts = []
            for date, group3 in itertools.groupby(dates):
                date_counts.append((date, len(list(group3))))
            date_counts.sort(key=lambda x: x[0])    
            concept_counts.append((concept, date_counts))
        counts.append((loc, dict(concept_counts)))
    return dict(counts)

Example #23

0

Show file

File: models.py Project: Web-Line/django-lms

    def get_events(self, course_set=False):
        # Create a dictionary of months in the semester that contains
        #  defaultdicts of lists
        start = datetime.datetime.combine(self.start, datetime.time(0, 0))
        end = datetime.datetime.combine(self.end, datetime.time(0, 0))

        occurrences = []

        if not course_set:
            course_set = self.course_set.all()

        # Gather all the occurences
        for course in course_set:
            for event in course.schedule.all():
                occurrences.append(
                    [
                        (single_occurence, event)
                        for single_occurence in event.recurrences.occurrences(dtstart=start, dtend=end)
                    ]
                )

        months = dict([(month, list(events)) for month, events in groupby(chain(*occurrences), lambda a: a[0].month)])

        for month, e in months.items():
            months[month] = dict([(day, list(events)) for day, events in groupby(e, lambda a: a[0].day)])

        return months

Example #24

0

Show file

File: dups.py Project: gzhang-hli/glu-genetics

def read_groups(aligns,platform):
  platform = platform.lower()

  if platform=='454':
    get_read_start = read_start_454
  elif platform in ('','neutral'):
    get_read_start = read_start_generic
  else:
    raise ValueError('Unknown platform specified: %s' % platform)

  for tid,contig_aligns in groupby(aligns,attrgetter('tid')):
    if tid==-1:
      continue

    fwd = []
    rev = []

    for align in contig_aligns:
      read_start = get_read_start(align)
      if not align.is_reverse:
        fwd.append( (read_start,align) )
      else:
        rev.append( (read_start,align) )

    fwd.sort()
    rev.sort()

    for pos,group in groupby(fwd,itemgetter(0)):
      yield [ align for pos,align in group ]

    for pos,group in groupby(rev,itemgetter(0)):
      yield [ align for pos,align in group ]

Example #25

0

Show file

File: views.py Project: datamade/yournextmp-popit

    def get_context_data(self, **kwargs):
        context = super(IllinoisPostListView, self).get_context_data(**kwargs)
        context['all_posts'] = []

        memberships = \
            MembershipExtra.objects.order_by('election__name')\
                                   .select_related('base')\
                                   .select_related('election')\
                                   .prefetch_related('base__on_behalf_of')\
                                   .prefetch_related('base__person')\
                                   .prefetch_related('base__post')\
                                   .prefetch_related('base__post__extra')

        mem_grouper = lambda x: x.election
        post_grouper = lambda x: x.base.post.extra.slug

        for election, memberships in itertools.groupby(memberships, key=mem_grouper):
            person_posts = OrderedDict()
            
            memberships = sorted(memberships, key=post_grouper)

            for post_slug, post_group in itertools.groupby(memberships, key=post_grouper):

                post_group = list(post_group)
                post = post_group[0].base.post

                person_posts[post] = []

                for membership in post_group:
                    person_posts[post].append([membership.base.person, 
                                               membership.base.on_behalf_of])
                
            context['all_posts'].append((election, person_posts))

        return context

Example #26

0

Show file

File: split_selections.py Project: Veedrac/Sublime-Extras

	def run(self, edit, within_lines=True):
		[*old_selections] = selections = self.view.sel()
		selections.clear()

		def selection_to_line(selection):
			return self.view.rowcol(selection.begin())[0]

		# Default, no grouping
		grouper = (lambda _: True)

		# Group by lines
		def line_grouper(selection):
			return self.view.rowcol(selection.begin())[0]


		if within_lines:
			# Is there no line with multiple selections?
			for category, group in groupby(old_selections, key=line_grouper):
				if sum(1 for _ in group) > 1:
					grouper = line_grouper


		for category, group in groupby(old_selections, key=grouper):
			first = next(group)
			for last in chain([first], group):
				pass

			selections.add(sublime.Region(first.begin(), last.end()))

Example #27

0

Show file

File: filter_quality.py Project: cfe-lab/MiCall

def report_bad_cycles(quality_csv, bad_cycles_csv, bad_tiles_csv=None):
    reader = csv.DictReader(quality_csv)
    writer = csv.DictWriter(bad_cycles_csv,
                            ['tile', 'cycle', 'errorrate'],
                            lineterminator=os.linesep)
    writer.writeheader()
    if bad_tiles_csv is None:
        tile_writer = None
    else:
        tile_writer = csv.DictWriter(bad_tiles_csv,
                                     ['tile', 'bad_cycles'],
                                     lineterminator=os.linesep)
        tile_writer.writeheader()
    for tile, tile_cycles in itertools.groupby(reader, itemgetter('tile')):
        bad_cycle_count = 0
        for _direction, cycles in itertools.groupby(tile_cycles,
                                                    direction_grouper):
            is_bad = False
            for cycle in cycles:
                errorrate = cycle['errorrate']
                is_bad = (is_bad or
                          errorrate is None or
                          errorrate == '' or
                          float(errorrate) >= BAD_ERROR_RATE)
                if is_bad:
                    writer.writerow(cycle)
                    bad_cycle_count += 1
        if tile_writer is not None:
            tile_writer.writerow(dict(tile=tile, bad_cycles=bad_cycle_count))

Example #28

0

Show file

File: releaser_tags.py Project: asbjornu/demozoo

def combined_releases(releaser):

	credits = releaser.credits().select_related('nick', 'production__default_screenshot')\
		.prefetch_related('production__author_nicks__releaser', 'production__author_affiliation_nicks__releaser', 'production__platforms', 'production__types')\
		.defer('production__notes', 'production__author_nicks__releaser__notes', 'production__author_affiliation_nicks__releaser__notes')\
		.order_by('-production__release_date_date', 'production__title', 'production__id', 'nick__name', 'nick__id')

	# reorganise credits queryset into a list of
	# (production, [ (nick, [credits_for_that_nick]) ]) records
	credits_by_production = groupby(credits, lambda credit: credit.production)
	# credits_by_production = list of (production, [credits]) records

	credits_by_production_nick = []
	for (production, credits) in credits_by_production:
		for (nick, credits) in groupby(credits, lambda credit: credit.nick):
			record = (production, nick, list(credits))
			credits_by_production_nick.append(record)

	production_ids = [production.id for production, credits, nick in credits_by_production_nick]

	productions = releaser.productions().select_related('default_screenshot')\
		.exclude(id__in=production_ids)\
		.prefetch_related('author_nicks__releaser', 'author_affiliation_nicks__releaser', 'platforms', 'types')\
		.defer('notes', 'author_nicks__releaser__notes', 'author_affiliation_nicks__releaser__notes')\
		.order_by('-release_date_date', '-title')

	credits_with_prods = credits_by_production_nick + [(prod, None, None) for prod in productions]
	credits_with_prods.sort(key=lambda item: (item[0].release_date_date is None, item[0].release_date_date), reverse=True)

	return {
		'releaser': releaser,
		'credits': credits_with_prods,
		'show_screenshots': True,
		'show_prod_types': True,
	}

Example #29

0

Show file

File: ParagraphReports.py Project: naveedalfarhan/MyPathian

def generate_paragraph_report(report_config, submitted_to, submitted_by):
    reporting_equipments = []

    for equipment in report_config["equipments"]:
        # all this query does is it takes the list of paragraphs and puts them into dictionaries keyed
        # first by paragraph type, then by component num, then by category.
        paragraphs = dict(
            (paragraph_type, dict(
                (component_num, dict(
                    (category_name, sorted(category_paragraph_list, key=lambda x: x["sort_order"]))
                    for category_name, category_paragraph_list
                    in groupby(sorted(component_paragraph_list, key=lambda x: x["category_name"]), lambda x: x["category_name"])))
                for component_num, component_paragraph_list
                in groupby(sorted(type_paragraph_list, key=lambda x: x["component_num"]), lambda x: x["component_num"])))
            for paragraph_type, type_paragraph_list
            in groupby(sorted(equipment["paragraphs"], key=lambda x: x["type"]), lambda x: x["type"]))

        reporting_equipments.append({
            "equipment": equipment["equipment"],
            "paragraphs": paragraphs
        })

    report_options = {
        "equipments": reporting_equipments,
        "reportdate": str(datetime.now().strftime("%m/%d/%Y")),
        "submittedto": submitted_to,
        "submittedby": submitted_by,
        "reportname": "Syrx"
    }

    # this is a list of all of the static files necessary to generate the pdf
    package_subfile = "paragraph"
    file_paths_to_copy = [
        pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "cover.html")),
        pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "cover-populate.js")),
        pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "footer.html")),
        pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "footer-populate.js")),
        pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "header.html")),
        pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "header-populate.js")),
        pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "report.html")),
        pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "report-populate.js")),
        pkg_resources.resource_filename(report_templates_package, os.path.join(package_subfile, "template.css")),
        pkg_resources.resource_filename(report_templates_package, os.path.join("images", "PathianLogoSmall.png")),
        pkg_resources.resource_filename(report_templates_package, os.path.join("scripts", "jquery.min.js")),
        pkg_resources.resource_filename(report_templates_package, os.path.join("scripts", "wkhtmltopdf.substitutions.js"))
    ]

    temp_dir = create_temp_dir(report_options, file_paths_to_copy)
    pdfname = "paragraph_report-" + datetime.now().strftime("%Y-%m-%d %H %M %S") + ".pdf"
    pdfpath = os.path.join(temp_dir, pdfname)
    header_html_path = os.path.join(temp_dir, "header.html")
    footer_html_path = os.path.join(temp_dir, "footer.html")
    cover_html_path = os.path.join(temp_dir, "cover.html")
    report_html_path = os.path.join(temp_dir, "report.html")

    subprocess.call(['wkhtmltopdf', '-B', '2cm', '-L', '2cm', '-T', '2cm', '-R', '2cm',
                     '--header-html', header_html_path, '--footer-html', footer_html_path,
                     'cover', cover_html_path, report_html_path, pdfpath])

    return pdfpath

Example #30

0

Show file

File: app.py Project: DX1Charles/route53-transfer

def group_values(lines):
    records = []
    for _, records in itertools.groupby(lines, lambda row: row[0:2]):
        for __, by_value in itertools.groupby(records, lambda row: row[-3:]):
            recs = list(by_value)  # consume the iterator so we can grab positionally
            first = recs[0]

            record = Record()
            record.name = first[0]
            record.type = first[1]
            if first[2].startswith('ALIAS'):
                _, alias_hosted_zone_id, alias_dns_name = first[2].split(':')
                record.alias_hosted_zone_id = alias_hosted_zone_id
                record.alias_dns_name = alias_dns_name
            else:
                record.resource_records = [r[2] for r in recs]
                record.ttl = first[3]
            record.region = first[4] or None
            record.weight = first[5] or None
            record.identifier = first[6] or None
            record.failover = first[7] or None
            if first[8] == 'True':
                 record.alias_evaluate_target_health = True
            elif first[8] == 'False':
                 record.alias_evaluate_target_health = False
            else:
                record.alias_evaluate_target_health = None

            yield record

Example #31

0

Show file

File: impl_3_3--full-2.py Project: SantyagoSeaman/Kaggle-The-Allen-AI-Science-Challenge

            source = hit['_source']
            categories = categories + source['categories']

        categories = [
            cat for cat in categories if re.search('^[0-9]{4}s? ', cat) is None
            and re.search('^[0-9]{2}th-century ', cat) is None
            and re.search(' in [0-9]{4}s?', cat) is None and re.search(
                ' films$', cat) is None and re.search(' clips$', cat) is None
            and re.search(' novels$', cat) is None
            and re.search('^Film[s]? ', cat) is None
            and re.search('^Screenplays ', cat) is None
        ]

        categories.sort()
        categories_freq = [(cat[0], len(list(cat[1])))
                           for cat in itertools.groupby(categories)]
        categories_freq.sort(key=lambda x: x[1], reverse=True)
        # Create knowledge domain
        knowledge_domain = [x[0] for x in categories_freq[0:20]]

        # Search answers inside knowledge domain
        # { "term": { "categories": categories[0] }}
        es_domain_query = [{
            "match": {
                "categories": cat
            }
        } for cat in knowledge_domain]

        hits = []
        scores = []
        fuzziness_arr = []

Example #32

0

Show file

File: analyzeObj.py Project: cathayc/CoralAnalysis

def analyzeObject(filePath):

    global faceList, edgeList, vertexList
    global minX, maxX, minY, maxY, minZ, maxZ

    faceList.clear()
    edgeList.clear()
    vertexList.clear()
    surfaceArea = 0
    volume = 0
    minX = math.inf
    maxX = -math.inf
    minY = math.inf
    maxY = -math.inf
    minZ = math.inf
    maxZ = -math.inf

    # Get file name and handle incorrect/missing file names
    try:
        with open(filePath, 'r') as file:
            text = file.read().splitlines()
            myCoral = Coral(filePath)
            print("Coral file " + myCoral.coralName + " found!")
    except IOError as e:
        print(filePath + " not found, please try another file:")
        return None

    # Start timer to analyze performance
    start_time = time.time()

    print("Building list of all vertices and faces.")
    # Build lists of all vertices and faces
    for i in range(0, len(text)):
        if len(text[i]) > 1:
            if text[i][0] == 'v' and ' ' == text[i][1]:
                vertexList.append(getListCoord(text[i], "v "))
            elif text[i][0] == 'f':
                faceList.append(text[i])
    myCoral.vertexList = vertexList

    print("Calculating area and volume.")
    for i in range(0, len(faceList)):
        # Break each face into the label numbers of each vertex
        vertex1 = faceList[i].lstrip('f ').split(' ')[0].split('/')[0]
        vertex2 = faceList[i].lstrip('f ').split(' ')[1].split('/')[0]
        vertex3 = faceList[i].lstrip('f ').split(' ')[2].split('/')[0]

        # Surface area calculated by summing area of each triangular face
        surfaceArea += triArea(vertex1, vertex2, vertex3)

        # Volume calculated by the sum of signed volumes of tetrahedrons. Each tetrahedron is formed by the three vertices of a face on the object; the fourth point is the origin
        volume += findtetraVolume(vertex1, vertex2, vertex3)

        #We define each edge as a list of two vertices and sort so that duplicates can easily be deleted later
        edge1 = sorted([vertex1, vertex2])
        edge2 = sorted([vertex2, vertex3])
        edge3 = sorted([vertex3, vertex1])

        # Add each edge to an edgeList
        edgeList.append(edge1)
        edgeList.append(edge3)
        edgeList.append(edge2)

    #Remove duplicates of edges
    edgeList.sort()
    edgeList = list(edgeList for edgeList, _ in itertools.groupby(edgeList))

    #Euler's Formula
    holes = int(-(len(vertexList) - len(edgeList) + len(faceList)) / 2 + 1)

    # Bounding Box distances
    length = abs(maxX - minX)
    width = abs(maxY - minY)
    height = abs(maxZ - minZ)
    boxDimensions = [minX, minY, minZ, maxX, maxY, maxZ]

    # Set coral object attributes
    if holes == 1:
        myCoral.numHoles = 1
    elif holes == 0:
        myCoral.numHoles = 0
    else:
        myCoral.numHoles = holes
    myCoral.numEdges = len(edgeList)
    myCoral.numVertices = len(vertexList)
    myCoral.numFaces = len(faceList)
    myCoral.boxDimensions = boxDimensions
    myCoral.surfaceArea = surfaceArea
    myCoral.volume = volume
    myCoral.analysisTime = time.time() - start_time

    #	Some print statements to help with visualizing if writing to document doesn't work
    print("\n\nThere are " + str(len(vertexList)) + " vertices.")
    print("There are " + str(len(edgeList)) + " edges.")
    print("There are " + str(len(faceList)) + " faces.")
    if holes == 1:
        print("There is one hole in the object.")
    elif holes == 0:
        print("There are no holes in the object.")
    else:
        print("There are " + str(holes) + " holes in the object.")
    print("\nThe bounding box dimensions are {:,.2f}".format(length) +
          "mm x " + "{:,.2f}".format(width) + "mm x " +
          "{:,.2f}".format(height) + "mm.")
    print("The surface area is {:,.3f}".format(surfaceArea) + " square mm.")
    print("The volume is {:,.3f}".format(volume) + " cubic mm.")

    print("\n\n--- Elapsed time: {:,.2f}".format(time.time() - start_time) +
          " seconds ---")

    print("Calculating fractal dimension.")

    # Calculate fractal dimension using bucket fractal dimension
    myFD, myX, myY = findBucketFD(vertexList, myCoral.findBoundBox())
    myCoral.myFD = myFD
    myCoral.myXY = myX, myY
    myCoral.plotMyFD()
    myCoral.plotToPlateau()
    #myCoral.writeXYtoFile()

    # Using Jessica's fractal dimension
    fileFD, fileX, fileY = findFromFDFile(myCoral.jessicafilePath)
    myCoral.fileFD = fileFD
    myCoral.fileXY = fileX, fileY
    #myCoral.plotFileFD()

    return myCoral

Example #33

0

Show file

File: seqs.py Project: marquisthunder/funcy

def ipartition_by(f, seq):
    for g, items in groupby(seq, f):
        yield items

Example #34

0

Show file

    def stage(workflow_name, stage_name):
        # these are the column names that appear in the screen
        colnames = [
            "id",
            "task",
            "successful",
            "status",
            "drm_status",
            "drm_jobID",
            "attempts",
            "submitted_on",
            "finished_on",
            "wall_time",
        ]
        # this indicates if this column can be used for sorting and searching. The names match SQL column names.
        names_internal = [
            "id",
            "params",
            "successful",
            "_status",
            False,
            "drm_jobID",
            "attempt",
            "submitted_on",
            "finished_on",
            "wall_time",
        ]

        in_page = request.args.get("in_page", 40, type=int)
        page = request.args.get("page", 1, type=int)
        keyword = request.args.get("keyword", "", type=str)
        sorting = request.args.get("sorting", None, type=str)
        order = request.args.get("order", None, type=str)

        ex = session.query(Workflow).filter_by(name=workflow_name).one()
        stage = session.query(Stage).filter_by(workflow_id=ex.id,
                                               name=stage_name).one()
        if stage is None:
            return abort(404)
        from sqlalchemy import text

        tasks = session.query(Task).filter_by(stage_id=stage.id)
        # search keyword
        if keyword == "":
            tasks_searched = tasks
        else:
            pattern = "%" + keyword.replace("'", "''") + "%"
            tasks_searched = tasks.filter(
                or_(*[
                    text(f"{field} LIKE '{pattern}'") if field else None
                    for field in names_internal
                ]))

        # sort
        tasks_sorted = tasks_searched
        if sorting is not None:
            if order == "desc":
                tasks_sorted = tasks_searched.order_by(
                    desc(getattr(Task, sorting)))
            elif order == "asc":
                tasks_sorted = tasks_searched.order_by(
                    asc(getattr(Task, sorting)))

        tasks_paginated = tasks_sorted[(page - 1) * in_page:page * in_page]

        try:
            n = tasks_searched.count()
            max_page = n // in_page + (1 if n % in_page > 0 else 0)
        except ZeroDivisionError:  # no tasks found after search
            max_page = 1

        # urls for page navigation
        first_url = (url_for(
            "cosmos.stage_query",
            workflow_name=workflow_name,
            stage_name=stage_name,
            old_page=1,
            old_keyword=keyword,
            old_in_page=in_page,
            sorting=sorting,
            order=order,
        ) if page != 1 else None)
        prev_url = (url_for(
            "cosmos.stage_query",
            workflow_name=workflow_name,
            stage_name=stage_name,
            old_page=page - 1,
            old_keyword=keyword,
            old_in_page=in_page,
            sorting=sorting,
            order=order,
        ) if page >= 2 else None)
        next_url = (url_for(
            "cosmos.stage_query",
            workflow_name=workflow_name,
            stage_name=stage_name,
            old_page=page + 1,
            old_keyword=keyword,
            old_in_page=in_page,
            sorting=sorting,
            order=order,
        ) if page < max_page else None)
        last_url = (url_for(
            "cosmos.stage_query",
            workflow_name=workflow_name,
            stage_name=stage_name,
            old_page=max_page,
            old_keyword=keyword,
            old_in_page=in_page,
            sorting=sorting,
            order=order,
        ) if page != max_page else None)

        # this will change only the url for the column currently used for sorting
        order_cycle = {None: "asc", "asc": "desc", "desc": None}
        ordering_for_urls = {
            colname: order_cycle[order] if good == sorting else "asc"
            for colname, good in zip(colnames, names_internal)
        }
        ordering_urls = {
            colname: url_for(
                f"cosmos.stage",
                workflow_name=workflow_name,
                stage_name=stage_name,
                in_page=in_page,
                page=page,
                keyword=keyword,
                sorting=good,
                order=ordering_for_urls[colname],
            ) if good else None
            for colname, good in zip(colnames, names_internal)
        }

        jm = JobManager(get_submit_args=None, logger=None)

        f = attrgetter("drm")
        drm_statuses = {}
        for drm, tasks in it.groupby(sorted(tasks_paginated, key=f), f):
            drm_statuses.update(jm.get_drm(drm).drm_statuses(list(tasks)))

        url_query = url_for(
            "cosmos.stage_query",
            old_page=page,
            old_keyword=keyword,
            sorting=sorting,
            order=order,
            workflow_name=workflow_name,
            stage_name=stage_name,
            old_in_page=in_page,
        )

        return render_template(
            "cosmos/stage.html",
            stage=stage,
            drm_statuses=drm_statuses,
            in_page=in_page,
            tasks_on_page=tasks_paginated,
            max_page=max_page,
            colnames=colnames,
            ordering_urls=ordering_urls,
            page=page,
            url_query=url_query,
            first_url=first_url,
            prev_url=prev_url,
            next_url=next_url,
            last_url=last_url,
            workflow_name=workflow_name,
            stage_name=stage_name,
            keyword=keyword,
        )

Example #35

0

Show file

File: fasttext_train.py Project: mindis/algrithm

# taste {'0': 117872, '1': 384200, '2': 95771, '3': 61508}
# region {'0': 438760, '1': 220591}
###############################################################################################################
find_distribution = False
all_job = ['timeliness','emotion','taste','region']
if find_distribution:
    content_iter = ExqUtils.load_file_as_iter(p)
    ori_distribution = {'timeliness': {}, 'emotion': {}, 'region': {}, 'taste': {}}
    while True:
        data = list(itertools.islice(content_iter, 10000 * 10))
        if len(data) > 0:
            json_res = [json.loads(i.strip()) for i in data]
            # sample_list = [c['title'] + ". " + c['text'] for c in content]
            for job in all_job:
                job_label_list = np.asarray(sorted([str(c[job]) for c in json_res]))
                for k, g in itertools.groupby(job_label_list):
                    ori_distribution[job].update({k: len(list(g)) + ori_distribution[job].get(k, 0)})
        else:
            break
    for job in all_job:
        print(job,ori_distribution[job])

####################
# 准备（分类）训练样本
# {'1': 39566, '2': 456327, '3': 64505, '4': 17625, '5': 4698, '6': 2979, '7': 24271, '8': 49380}
####################
if prepare_samples:
    print("加载各样本")
    content_iter = ExqUtils.load_file_as_iter(p)
    distribution = {}
    print("清空文件")

Example #36

0

Show file

File: rle.py Project: Vijaysai005/KProject

def RunLengthEncoding(input_string):
		return [(len(list(j)), i) for i,j in groupby(input_string)]

Example #37

0

Show file

File: asyn_lpa.py Project: yihongfa/networkx

def asyn_lpa_communities(G, weight=None):
    """Returns communities in ``G`` as detected by asynchronous label
    propagation.

    The asynchronous label propagation algorithm is described in
    [1]_. The algorithm is probabilistic and the found communities may
    vary on different executions.

    The algorithm proceeds as follows. After initializing each node with
    a unique label, the algorithm repeatedly sets the label of a node to
    be the label that appears most frequently among that nodes
    neighbors. The algorithm halts when each node has the label that
    appears most frequently among its neighbors. The algorithm is
    asynchronous because each node is updated without waiting for
    updates on the remaining nodes.

    This generalized version of the algorithm in [1]_ accepts edge
    weights.

    Parameters
    ----------
    G : Graph

    weight : string
        The edge attribute representing the weight of an edge. If
        ``None``, each edge is assumed to have weight one. In this
        algorithm, the weight of an edge is used in determining the
        frequency with which a label appears among the neighbors of a
        node: a higher weight means the label appears more often.

    Returns
    -------
    communities : iterable
        Iterable of communities given as sets of nodes.

    Notes
    ------
    Edge weight attributes must be numerical.

    References
    ----------
    .. [1] Raghavan, Usha Nandini, Réka Albert, and Soundar Kumara. "Near
           linear time algorithm to detect community structures in large-scale
           networks." Physical Review E 76.3 (2007): 036106.
    """

    labels = {n: i for i, n in enumerate(G)}
    cont = True
    while cont:
        cont = False
        nodes = list(G)
        random.shuffle(nodes)
        # Calculate the label for each node
        for node in nodes:
            if len(G[node]) < 1:
                continue

            # Get label frequencies. Depending on the order they are processed
            # in some nodes with be in t and others in t-1, making the
            # algorithm asynchronous.
            label_freq = Counter({labels[v]: G.edge[v][node][weight]
                                  if weight else 1 for v in G[node]})

            # Choose the label with the highest frecuency. If more than 1 label
            # has the highest frecuency choose one randomly.
            max_freq = max(label_freq.values())
            best_labels = [label for label, freq in label_freq.items()
                           if freq == max_freq]
            new_label = random.choice(best_labels)
            labels[node] = new_label
            # Continue until all nodes have a label that is better than other
            # neighbour labels (only one label has max_freq for each node).
            cont = cont or len(best_labels) > 1

    return (set(v) for k, v in groupby(sorted(labels, key=labels.get),
                                       key=labels.get))

Example #38

0

Show file

File: Program17.py Project: Komal4982/ML-Fellowship-Program

import itertools
num = [[10, 20], [40], [30, 56, 25], [10, 20], [33], [40]]
print("Original List", num)
num.sort()
new_num = list(num for num, _ in itertools.groupby(num))
print("New List", new_num)

Example #39

0

Show file

File: download_helpers.py Project: samhaug/obspy_original

    def get_availability(self):
        """
        Queries the current client for information on what stations are
        available given the spatial and temporal restrictions.
        """
        # Check if stations needs to be filtered after downloading or if the
        # restrictions one can impose with the FDSN webservices queries are
        # enough. This depends on the domain definition.
        try:
            self.domain.is_in_domain(0, 0)
            needs_filtering = True
        except NotImplementedError:
            needs_filtering = False

        arguments = {
            "network": self.restrictions.network,
            "station": self.restrictions.station,
            "location": self.restrictions.location,
            "channel": self.restrictions.channel,
            "starttime": self.restrictions.starttime,
            "endtime": self.restrictions.endtime,
            # Request at the channel level.
            "level": "channel"
        }
        # Add the domain specific query parameters.
        arguments.update(self.domain.get_query_parameters())

        # Check the capabilities of the service and see what is the most
        # appropriate way of acquiring availability information. Some services
        # right now require manual overriding of what they claim to be
        # capable of.
        if "matchtimeseries" in self.client.services["station"]:
            arguments["matchtimeseries"] = True
            if "format" in self.client.services["station"]:
                arguments["format"] = "text"
            self.is_availability_reliable = True
        else:
            if "format" in self.client.services["station"]:
                arguments["format"] = "text"
            self.is_availability_reliable = False

        if self.is_availability_reliable:
            self.logger.info("Client '%s' - Requesting reliable "
                             "availability." % self.client_name)
        else:
            self.logger.info(
                "Client '%s' - Requesting unreliable availability." %
                self.client_name)

        try:
            start = time.time()
            inv = self.client.get_stations(**arguments)
            end = time.time()
        except utils.ERRORS as e:
            if "no data available" in str(e).lower():
                self.logger.info(
                    "Client '%s' - No data available for request." %
                    self.client_name)
                return
            self.logger.error(
                "Client '{0}' - Failed getting availability: %s".format(
                    self.client_name), str(e))
            return
        self.logger.info("Client '%s' - Successfully requested availability "
                         "(%.2f seconds)" % (self.client_name, end - start))

        # Get the time intervals from the restrictions.
        intervals = [TimeInterval(start=_i[0], end=_i[1])
                     for _i in self.restrictions]

        for network in inv:
            for station in network:
                # Skip the station if it is not in the desired domain.
                if needs_filtering is True and \
                        not self.domain.is_in_domain(station.latitude,
                                                     station.longitude):
                    continue

                channels = []
                for channel in station.channels:
                    # Remove channels that somehow slipped past the temporal
                    # constraints due to weird behaviour from the data center.
                    if (channel.start_date > self.restrictions.endtime) or \
                            (channel.end_date < self.restrictions.starttime):
                        continue
                    channels.append(Channel(
                        location=channel.location_code, channel=channel.code,
                        intervals=copy.deepcopy(intervals)))

                # Group by locations and apply the channel priority filter to
                # each.
                filtered_channels = []

                def get_loc(x):
                    return x.location

                for location, _channels in itertools.groupby(
                        sorted(channels, key=get_loc), get_loc):
                    filtered_channels.extend(utils.filter_channel_priority(
                        list(_channels), key="channel",
                        priorities=self.restrictions.channel_priorities))
                channels = filtered_channels

                # Filter to remove unwanted locations according to the priority
                # list.
                channels = utils.filter_channel_priority(
                    channels, key="location",
                    priorities=self.restrictions.location_priorities)

                if not channels:
                    continue

                self.stations[(network.code, station.code)] = Station(
                    network=network.code,
                    station=station.code,
                    latitude=station.latitude,
                    longitude=station.longitude,
                    channels=channels)
        self.logger.info("Client '%s' - Found %i stations (%i channels)." % (
            self.client_name, len(self.stations),
            sum([len(_i.channels) for _i in self.stations.values()])))

Example #40

0

Show file

def ReconstructType(Field, EA):
    #
    # Get at the metadata for *this* instruction alone
    #
    Traces = GetHeapTraces(Field, EA)
    Metadata = tuple(Meta(Trace[Field]) for Trace in Traces)
    (Size, Offset, Frames) = EnsureHeapMetadataHomogeneity(Metadata)
    StructId = AskUserForStructOfSize(Size)
    BaseAddresses = set(M.Heap.Base for M in Metadata)

    if StructId == ida.BADNODE:
        return

    #
    # Presumably, this instruction was hit multiple times, and may have
    # operated on several different allocations of the exact same variety.
    #
    # Given this data, find *all* instructions which interacted with
    # the exact same data.  Specifically, we want to get at the traces.
    #
    for F in Frames:
        print ", ".join(str(_) for _ in Frames)
    # print "Analyzing allocations of size %#x with call frame..." % Size
    # print '\n'.join('    %s' % F for F in Frames)

    RelevantTraces = tuple(TracesForHeapAllocation(BaseAddresses, Size,
                                                   Frames))
    UniqueOffset = lambda T: T['Address']['Heap']['Offset']

    print "Found %#x traces" % len(RelevantTraces)

    #
    # Group all of the fields by their offset from the base
    # of the heap allocations
    #
    for TraceOffset, Traces in itertools.groupby(RelevantTraces, UniqueOffset):
        # N.B. itertools.groupby() group can only be iterated once
        #      so we have to save it off.
        Traces = tuple(Traces)
        FieldFromTraces(StructId, TraceOffset, Traces)

        #
        # Group the traces by instruction, and set the structure type
        # on the memory operand if there isn't one already.
        #
        for Trace in Traces:  # o_displ
            EA = Trace['IP']
            Operands = {0: ida.GetOpType(EA, 0), 1: ida.GetOpType(EA, 1)}

            # We want to find the 'Address' operand, which will
            # be of type displacement, memory, or phrase.
            OpMem = next(k for k, v in Operands.items()
                         if v in (ida.o_displ, ida.o_mem, ida.o_phrase))

            OpValue = ida.GetOperandValue(EA, OpMem)
            OpOff = 0

            # o_phrase comes up for some instructions that look like:
            #       mov reg, [reg2]
            # and OpValue will then be an index for the list returned by GetRegisterList()
            # instead of '0'.
            if Operands[OpMem] == ida.o_phrase:
                OpOff = TraceOffset
            else:
                OpOff = TraceOffset - OpValue
                print "%x = %x - %x" % (OpOff, TraceOffset, OpValue)

            print "OpStroffEx(%x, %x, %x, %x)" % (EA, OpMem, StructId, OpOff)
            ida.OpStroffEx(EA, OpMem, StructId, OpOff)

Example #41

0

Show file

def compress_homopolymer(seq):
    return ''.join(x[0] for x in groupby(list(seq)))

Example #42

0

Show file

File: unique_in_order.py Project: Throupy/Codewars

def unique_in_order(iterable):
    return [k for (k, _) in groupby(iterable)]

Example #43

0

Show file

File: package.py Project: zzdjk6/mysql-utilities

    def run(self):
        # Set up paths to write to config file
        install_dir = self.install_dir
        install_logdir = '/var/log'
        if self.user or self.home:
            install_sysconfdir = os.path.join(install_dir, 'etc')
        elif os.name == 'posix' and install_dir in ('/', '/usr'):
            install_sysconfdir = '/etc'
        else:
            install_sysconfdir = 'scripts\\etc\\mysql'

        if not self.data_files:
            return
        # Go over all entries in data_files and process it if needed
        for df in self.data_files:
            # Figure out what the entry contain and collect a list of files.
            if isinstance(df, str):
                # This was just a file name, so it will be installed
                # in the install_dir location. This is a copy of the
                # behaviour inside distutils intall_data.
                directory = install_dir
                filenames = [df]
            else:
                directory = df[0]
                filenames = df[1]

            # Process all the files for the entry and build a list of
            # tuples (directory, file)
            data_files = []
            for filename in filenames:
                # It was a config file template, add install
                # directories to the config file.
                if fnmatch.fnmatch(filename, 'data/*.cfg.in'):
                    config = ConfigParser.RawConfigParser({
                        'prefix': '',  # custom install_dir,
                        'logdir': install_logdir,
                        'sysconfdir': install_sysconfdir,
                    })
                    config.readfp(open(filename))
                    filename = os.path.splitext(filename)[0]
                    config.write(open(filename, "w"))
                    # change directory 'fabric'to mysql
                    directory = os.path.join(install_sysconfdir, 'mysql')
                if os.name == 'nt':
                    directory = install_sysconfdir
                data_files.append((directory, filename))

        # Re-construct the data_files entry from what was provided by
        # merging all tuples with same directory and provide a list of
        # files as second item, e.g.:
        #   [('foo', 1), ('bar', 2), ('foo', 3), ('foo', 4), ('bar', 5)]
        #   --> [('bar', [2, 5]), ('foo', [1, 3, 4])]
        data_files.sort()
        data_files = [
            (d, [f[1] for f in fs]) for d, fs in groupby(data_files,
                                                         key=lambda x: x[0])
        ]
        self.data_files = data_files
        log.info("package--> self.data_files {0}".format(self.data_files))
        log.info("package.py--> self.data_files {0}".format(self.data_files))
        _install_data.run(self)

Example #44

0

Show file

File: algorithms.py Project: speglich/devito

def stree_schedule(clusters):
    """
    Arrange an iterable of Clusters into a ScheduleTree.
    """
    stree = ScheduleTree()

    prev = Cluster(None)
    mapper = DefaultOrderedDict(lambda: Bunch(top=None, bottom=None))

    def reuse_metadata(c0, c1, d):
        return (c0.guards.get(d) == c1.guards.get(d)
                and c0.syncs.get(d) == c1.syncs.get(d))

    def attach_metadata(cluster, d, tip):
        if d in cluster.guards:
            tip = NodeConditional(cluster.guards[d], tip)
        if d in cluster.syncs:
            tip = NodeSync(cluster.syncs[d], tip)
        return tip

    for c in clusters:
        index = 0

        # Reuse or add in any Conditionals and Syncs outside of the outermost Iteration
        if not reuse_metadata(c, prev, None):
            tip = attach_metadata(c, None, stree)
            maybe_reusable = []
        else:
            try:
                tip = mapper[prev.itintervals[index]].top.parent
            except IndexError:
                tip = stree
            maybe_reusable = prev.itintervals

        for it0, it1 in zip(c.itintervals, maybe_reusable):
            if it0 != it1:
                break
            index += 1

            d = it0.dim

            # The reused sub-trees might acquire new sub-iterators as well as
            # new properties
            mapper[it0].top.ispace = IterationSpace.union(
                mapper[it0].top.ispace, c.ispace.project([d]))
            mapper[it0].top.properties = normalize_properties(
                mapper[it0].top.properties, c.properties[it0.dim])

            # Different guards or SyncOps cannot further be nested
            if not reuse_metadata(c, prev, d):
                tip = mapper[it0].top
                tip = attach_metadata(c, d, tip)
                mapper[it0].bottom = tip
                break
            else:
                tip = mapper[it0].bottom

        # Nested sub-trees, instead, will not be used anymore
        for it in prev.itintervals[index:]:
            mapper.pop(it)

        # Add in Iterations, Conditionals, and Syncs
        for it in c.itintervals[index:]:
            d = it.dim
            tip = NodeIteration(c.ispace.project([d]), tip,
                                c.properties.get(d, ()))
            mapper[it].top = tip
            tip = attach_metadata(c, d, tip)
            mapper[it].bottom = tip

        # Add in Expressions
        exprs = []
        for conditionals, g in groupby(c.exprs, key=lambda e: e.conditionals):
            exprs = list(g)

            # Indirect ConditionalDimensions induce expression-level guards
            if conditionals:
                guard = And(*conditionals.values(), evaluate=False)
                parent = NodeConditional(guard, tip)
            else:
                parent = tip

            NodeExprs(exprs, c.ispace, c.dspace, c.ops, c.traffic, parent)

        # Prepare for next iteration
        prev = c

    return stree

Example #45

0

Show file

File: views.py Project: wczyz/oioioi

def _serialize_report(user, problem_instances, test_groups):
    """Generates a dictionary representing a single report.


       :param request: Django request
       :type user: :cls:`django.contrib.auth.User`
       :param user: user to generate the report for
       :type problem_instances: list of
                                 :cls:`oioioi.contests.ProblemInstance`
       :param problem_instances: problem instances to include in the report
       :type test_groups: dict(:cls:`oioioi.contests.ProblemInstance`
                           -> list of str)
       :param test_groups: dictionary mapping problem instances into lists
                           of names of test groups to include
    """

    resultsets = []
    total_score = None

    results = UserResultForProblem.objects.filter(
            user=user,
            problem_instance__in=list(problem_instances),
            submission_report__isnull=False)
    for r in results:
        problem_instance = r.problem_instance
        submission_report = r.submission_report
        submission = submission_report.submission
        source_file = submission.programsubmission.source_file
        groups = list(test_groups[problem_instance])

        try:
            compilation_report = CompilationReport.objects \
                    .get(submission_report=submission_report)
        except CompilationReport.DoesNotExist:
            compilation_report = None

        try:
            test_reports = TestReport.objects \
                    .filter(submission_report__submission=submission) \
                    .filter(submission_report__status='ACTIVE') \
                    .filter(submission_report__kind__in=['INITIAL',
                                                         'NORMAL']) \
                    .filter(test_group__in=groups) \
                    .order_by('test__kind', 'test__order', 'test_name')
        except TestReport.DoesNotExist:
            test_reports = []

        group_reports = GroupReport.objects \
                .filter(submission_report__submission=submission) \
                .filter(submission_report__status='ACTIVE') \
                .filter(submission_report__kind__in=['INITIAL', 'NORMAL']) \
                .filter(group__in=groups)
        group_reports = dict((g.group, g) for g in group_reports)
        groups = []
        for group_name, tests in itertools.groupby(test_reports,
                attrgetter('test_group')):
            groups.append({'tests': list(tests),
                'report': group_reports[group_name]})

        problem_score = None
        max_problem_score = None
        for group in groups:
            group_score = group['report'].score
            group_max_score = group['report'].max_score

            if problem_score is None:
                problem_score = group_score
            elif group_score is not None:
                problem_score += group_score

            if max_problem_score is None:
                max_problem_score = group_max_score
            elif group_max_score is not None:
                max_problem_score += group_max_score

        resultsets.append(dict(
            result=r,
            score=problem_score,
            max_score=max_problem_score,
            compilation_report=compilation_report,
            groups=groups,
            code=source_file.read(),
            codefile=source_file.file.name
        ))
        source_file.close()
        if total_score is None:
            total_score = problem_score
        elif problem_score is not None:
            total_score += problem_score
    return {
        'user': user,
        'resultsets': resultsets,
        'sum': total_score,
    }

Example #46

0

Show file

def main():
    trips_csv = DictReader(file(DATA_ROOT + 'trips.txt'))
    stops_csv = DictReader(file(DATA_ROOT + 'stops.txt'))
    stop_times_csv = DictReader(file(DATA_ROOT + 'stop_times.txt'))
    routes_csv = DictReader(file(DATA_ROOT + 'routes.txt'))

    gexf = GEXF()

    routes = dict()
    for route in routes_csv:
        if route['route_type'] in CONVERT_ROUTE_TYPES:
            routes[route['route_id']] = route
    print 'routes', len(routes)

    trips = dict()
    for trip in trips_csv:
        if trip['route_id'] in routes:
            trip['color'] = routes[trip['route_id']]['route_color']
            trips[trip['trip_id']] = trip
    print 'trips', len(trips)

    stops = set()
    edges = dict()
    for trip_id, stop_time_iter in groupby(
            stop_times_csv, lambda stop_time: stop_time['trip_id']):
        if trip_id in trips:
            trip = trips[trip_id]
            prev_stop = stop_time_iter.next()['stop_id']
            stops.add(prev_stop)
            for stop_time in stop_time_iter:
                stop = stop_time['stop_id']
                edge = (prev_stop, stop)
                edges[edge] = trip['color']
                stops.add(stop)
                prev_stop = stop
    print 'stops', len(stops)
    print 'edges', len(edges)

    #stop_map = dict()
    stops_used = set(DISCARD_STATIONS)
    for stop in stops_csv:
        if stop['stop_id'] in stops:
            stop_id = stop['stop_id']
            name = stop['stop_name']
            lat = stop['stop_lat']
            lon = stop['stop_lon']
            #if name not in stops_used:
            if stop_id not in stops_used:
                gexf.add_node(stop_id, name, lon, lat)
                stops_used.add(stop_id)

    edges_used = set()
    for (start_stop_id, end_stop_id), color in edges.iteritems():
        #start_stop_name = stop_map[start_stop_id]
        #end_stop_name = stop_map[end_stop_id]
        #start_stop_id = get_stop_id(start_stop_id)
        #end_stop_id = get_stop_id(end_stop_id)
        #if start_stop_name in DISCARD_STATIONS or end_stop_name in DISCARD_STATIONS:
        #    continue
        edge = min((start_stop_id, end_stop_id), (end_stop_id, start_stop_id))
        if edge not in edges_used:
            gexf.add_edge(start_stop_id, end_stop_id, color)
            edges_used.add(edge)

    gexf.write(file('out.gexf', 'w'))

Example #47

0

Show file

File: utils.py Project: ommirandap/pelican

def process_translations(content_list, order_by=None):
    """ Finds translation and returns them.

    Returns a tuple with two lists (index, translations).  Index list includes
    items in default language or items which have no variant in default
    language. Items with the `translation` metadata set to something else than
    `False` or `false` will be used as translations, unless all the items with
    the same slug have that metadata.

    For each content_list item, sets the 'translations' attribute.

    order_by can be a string of an attribute or sorting function. If order_by
    is defined, content will be ordered by that attribute or sorting function.
    By default, content is ordered by slug.

    Different content types can have default order_by attributes defined
    in settings, e.g. PAGES_ORDER_BY='sort-order', in which case `sort-order`
    should be a defined metadata attribute in each page.
    """
    content_list.sort(key=attrgetter('slug'))
    grouped_by_slugs = groupby(content_list, attrgetter('slug'))
    index = []
    translations = []

    for slug, items in grouped_by_slugs:
        items = list(items)
        # items with `translation` metadata will be used as translations…
        default_lang_items = list(filter(
                lambda i: i.metadata.get('translation', 'false').lower()
                        == 'false',
                items))
        # …unless all items with that slug are translations
        if not default_lang_items:
            default_lang_items = items

        # display warnings if several items have the same lang
        for lang, lang_items in groupby(items, attrgetter('lang')):
            lang_items = list(lang_items)
            len_ = len(lang_items)
            if len_ > 1:
                logger.warning('There are %s variants of "%s" with lang %s',
                    len_, slug, lang)
                for x in lang_items:
                    logger.warning('\t%s', x.source_path)

        # find items with default language
        default_lang_items = list(filter(attrgetter('in_default_lang'),
                default_lang_items))

        # if there is no article with default language, take an other one
        if not default_lang_items:
            default_lang_items = items[:1]

        if not slug:
            logger.warning(
                    'empty slug for %s. '
                    'You can fix this by adding a title or a slug to your '
                    'content',
                    default_lang_items[0].source_path)
        index.extend(default_lang_items)
        translations.extend([x for x in items if x not in default_lang_items])
        for a in items:
            a.translations = [x for x in items if x != a]

    if order_by:
        if callable(order_by):
            try:
                index.sort(key=order_by)
            except Exception:
                logger.error('Error sorting with function %s', order_by)
        elif isinstance(order_by, six.string_types):
            if order_by.startswith('reversed-'):
                order_reversed = True
                order_by = order_by.replace('reversed-', '', 1)
            else:
                order_reversed = False

            if order_by == 'basename':
                index.sort(key=lambda x: os.path.basename(x.source_path or ''),
                           reverse=order_reversed)
            # already sorted by slug, no need to sort again
            elif not (order_by == 'slug' and not order_reversed):
                try:
                    index.sort(key=attrgetter(order_by),
                               reverse=order_reversed)
                except AttributeError:
                    logger.warning('There is no "%s" attribute in the item '
                        'metadata. Defaulting to slug order.', order_by)
        else:
            logger.warning('Invalid *_ORDER_BY setting (%s).'
                'Valid options are strings and functions.', order_by)

    return index, translations

Example #48

0

Show file

File: dim_reduction.py Project: shenshan/ibllib

    alf_path = '/home/mic/Downloads/ZM_1735_2019-08-01_001/mnt/s0/Data/Subjects/ZM_1735/2019-08-01/001/alf'
    # can be addressed as spikes['time'] or spikes.time
    spikes = alf.io.load_object(alf_path, 'spikes')
    clusters = alf.io.load_object(alf_path, 'clusters')
    channels = alf.io.load_object(alf_path, 'channels')
    trials = alf.io.load_object(alf_path, '_ibl_trials')

    # Print number of clusters for each brain region
    locDict_bothProbes = clusters['brainAcronyms']['brainAcronyms'].to_dict()
    cluster_idx_probe1 = np.unique(spikes['clusters'])
    locDict = {}
    for i in locDict_bothProbes:
        if i in cluster_idx_probe1:
            locDict[i] = locDict_bothProbes[i]
    print([(k, len(list(v)))
           for k, v in itertools.groupby(sorted(locDict.values()))])

    # set key parameters
    T_BIN = 0.1
    TRIALS_TO_PLOT = np.arange(20, 23)  # use the real trial numbers
    PROJECTED_DIMENSIONS = 3
    BEHAVIORAL_VARIABLE = 'choice'
    BRAIN_AREA = 'MB'  # that depends on the dataset

    # Reduce neural data to region of interest
    if BRAIN_AREA:
        locations = clusters['brainAcronyms']
        spikes = pd.DataFrame.from_dict(spikes)
        loc_idx = locations.loc[(
            locations['brainAcronyms'] == BRAIN_AREA)].index
        spikes = spikes[np.isin(spikes['clusters'], loc_idx)]

Example #49

0

Show file

File: rnaseq_junction_bam2bam.py Project: wangdi2014/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $",
                            usage=globals()["__doc__"])

    parser.add_option("-t", "--template-bam-file", dest="filename_genome_bam", type="string",
                      help="input bam file for header information [%default]")

    parser.add_option("-s", "--contigs-tsv-file", dest="filename_contigs", type="string",
                      help="filename with contig sizes [%default]")

    parser.add_option("-o", "--colour", dest="colour_mismatches", action="store_true",
                      help="mismatches will use colour differences (CM tag) [%default]")

    parser.add_option("-i", "--ignore-mismatches", dest="ignore_mismatches", action="store_true",
                      help="ignore mismatches [%default]")

    parser.add_option("-c", "--remove-contigs", dest="remove_contigs", type="string",
                      help="','-separated list of contigs to remove [%default]")

    parser.add_option("-f", "--force-output", dest="force", action="store_true",
                      help="force overwriting of existing files [%default]")

    parser.add_option("-u", "--unique", dest="unique", action="store_true",
                      help="remove reads not matching uniquely [%default]")

    parser.set_defaults(
        filename_genome_bam=None,
        filename_gtf=None,
        filename_mismapped=None,
        remove_contigs=None,
        force=False,
        unique=False,
        colour_mismatches=False,
        ignore_mismatches=False,
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    genomefile, referencenames, referencelengths = None, None, None

    if options.filename_genome_bam:
        genomefile = pysam.AlignmentFile(options.filename_genome_bam, "rb")
    elif options.filename_contigs:
        contigs = IOTools.ReadMap(IOTools.openFile(options.filename_contigs))
        data = list(zip(*list(contigs.items())))
        referencenames, referencelengths = data[0], list(map(int, data[1]))
    else:
        raise ValueError(
            "please provide either --template-bam-file or --contigs-tsv-file")

    infile = pysam.AlignmentFile("-", "rb")
    outfile = pysam.AlignmentFile("-", "wb", template=genomefile,
                                  referencenames=referencenames,
                                  referencelengths=referencelengths)

    if options.colour_mismatches:
        tag = "CM"
    else:
        tag = "NM"

    nambiguous = 0
    ninput = 0
    nunmapped = 0
    ncigar = 0
    nfull = 0
    noutput = 0

    contig2tid = dict([(y, x) for x, y in enumerate(outfile.references)])

    for qname, readgroup in itertools.groupby(infile, lambda x: x.qname):
        ninput += 1
        reads = list(readgroup)
        if reads[0].is_unmapped:
            nunmapped += 1
            continue

        # filter for best match
        best = min([x.opt(tag) for x in reads])
        reads = [x for x in reads if x.opt(tag) == best]
        if len(reads) > 1:
            nambiguous += 1
            continue

        read = reads[0]

        # reject complicated matches (indels, etc)
        # to simplify calculations below.
        if len(read.cigar) > 1:
            ncigar += 1
            continue

        # set NH flag to latest count
        t = dict(read.tags)
        t['NH'] = 1
        read.tags = list(t.items())

        sname = infile.getrname(read.tid)

        contig, first_exon_start, middle, last_exon_end, splice, strand = sname.split(
            "|")
        first_exon_end, last_exon_start = middle.split("-")
        first_exon_start, first_exon_end, last_exon_start, last_exon_end = list(map(int, (
            first_exon_start, first_exon_end, last_exon_start, last_exon_end)))
        first_exon_end += 1

        total = first_exon_end - first_exon_start + \
            last_exon_end - last_exon_start
        first_exon_length = first_exon_end - first_exon_start

        match1 = first_exon_length - read.pos
        intron_length = last_exon_start - first_exon_end
        match2 = read.qlen - match1

        # match lies fully in one exon - ignore
        if match1 <= 0 or match2 <= 0:
            nfull += 1
            continue

        # increment pos
        read.pos = first_exon_start + read.pos
        read.tid = contig2tid[contig]
        # 3 = BAM_CREF_SKIP
        read.cigar = [(0, match1), (3, intron_length), (0, match2)]

        outfile.write(read)

        noutput += 1

    outfile.close()
    if genomefile:
        genomefile.close()

    c = E.Counter()
    c.input = ninput
    c.output = noutput
    c.full = nfull
    c.cigar = ncigar
    c.ambiguous = nambiguous
    c.unmapped = nunmapped

    E.info("%s" % str(c))

    # write footer and output benchmark information.
    E.Stop()

Example #50

0

Show file

nms = [cat['name'] for cat in cats]  # category names
cat_id_to_name = {cat['id']: cat['name']
                  for cat in cats}  # category id to name mapping
cat_name_to_id = {cat['name']: cat['id']
                  for cat in cats}  # category name to id mapping
# print('COCO categories: \n{}'.format(' '.join(nms)))

nms = set([cat['supercategory'] for cat in cats])  # supercategory names
cat_to_supercat = {cat['name']: cat['supercategory'] for cat in cats}
cat_id_to_supercat = {cat['id']: cat['supercategory'] for cat in cats}
# print('COCO supercategories: \n{}'.format(' '.join(nms)))
# print len(nms)

# print supercategory and categories in each supercategory
supercat_to_cats = {}
for key, group in itertools.groupby(sorted([(sc, c) for (c, sc) in cat_to_supercat.items()]), lambda x: x[0]):
    lst = [thing[1] for thing in group]
    print(key, ":", ", ".join(lst))
    supercat_to_cats[key] = lst

colors = [(30, 144, 255), (255, 140, 0), (34, 139, 34), (255, 0, 0), (147, 112, 219), (139, 69, 19), (255, 20, 147), (128, 128, 128),
          (85, 107, 47), (0, 255, 255)]


def get_color(i):
    return colors[i % len(colors)]


# load and see image
img = coco.loadImgs([87058])[0] # make sure image ID exists in the dataset given to you.
# I = io.imread('%s/%s/%s'%(dataDir,dataType,img['file_name'])) # make sure data dir is correct

Example #51

0

Show file

File: plot_trajectory.py Project: pierrelux/tde-cpd

import operator
import itertools
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import colorConverter
from matplotlib.collections import LineCollection
from matplotlib.colors import ListedColormap, BoundaryNorm

data = np.loadtxt('../../datasets/gatech_honeybee/sequence1.dat', dtype=np.dtype({'names':['x', 'y', 'mode'], 
  'formats':[np.double, np.double, 'S100']}))

segments = [np.array(list(g)) for k, g in itertools.groupby(data, key=operator.itemgetter(2))]

colormap = {'waggle':'-r', 'turn_left':'-g', 'turn_right':'-b'}

for segment in segments:
    plt.plot(segment['x'], segment['y'], colormap[segment['mode'][0]], linewidth=2.0)

plt.show()

Example #52

0

Show file

    def assert_guess_values(self, matrix, block, guesses, spin_flip=False):
        """
        Assert that the guesses correspond to the smallest
        diagonal values.
        """
        # Extract useful quantities
        mospaces = matrix.mospaces
        nCa = noa = mospaces.n_orbs_alpha("o1")
        nva = mospaces.n_orbs_alpha("v1")
        if mospaces.has_core_occupied_space:
            nCa = mospaces.n_orbs_alpha("o2")

        # Make a list of diagonal indices, ordered by the corresponding
        # diagonal values
        sidcs = None
        if block == "ph":
            diagonal = matrix.diagonal().ph.to_ndarray()

            # Build list of indices, which would sort the diagonal
            sidcs = np.dstack(
                np.unravel_index(np.argsort(diagonal.ravel()), diagonal.shape))
            assert sidcs.shape[0] == 1
            if spin_flip:
                sidcs = [
                    idx for idx in sidcs[0] if idx[0] < nCa and idx[1] >= nva
                ]
            else:
                sidcs = [
                    idx for idx in sidcs[0]
                    if any((idx[0] >= nCa and idx[1] >= nva,
                            idx[0] < nCa and idx[1] < nva))  # noqa: E221
                ]
        elif block == "pphh":
            diagonal = matrix.diagonal().pphh.to_ndarray()

            # Build list of indices, which would sort the diagonal
            sidcs = np.dstack(
                np.unravel_index(np.argsort(diagonal.ravel()), diagonal.shape))

            assert sidcs.shape[0] == 1
            if spin_flip:
                sidcs = [
                    idx for idx in sidcs[0] if any((
                        idx[0] < noa and idx[1] < nCa and idx[2] < nva
                        and idx[3] >= nva,  # noqa: E221,E501
                        idx[0] < noa and idx[1] < nCa and idx[2] >= nva
                        and idx[3] < nva,  # noqa: E221,E501
                        idx[0] < noa and idx[1] >= nCa and idx[2] >= nva
                        and idx[3] >= nva,  # noqa: E221,E501
                        idx[0] >= noa and idx[1] < nCa and idx[2] >= nva
                        and idx[3] >= nva))  # noqa: E221,E501
                ]
            else:
                sidcs = [
                    idx for idx in sidcs[0] if any((
                        idx[0] < noa and idx[1] < nCa and idx[2] < nva
                        and idx[3] < nva,  # noqa: E221,E501
                        idx[0] >= noa and idx[1] >= nCa and idx[2] >= nva
                        and idx[3] >= nva,  # noqa: E221,E501
                        idx[0] < noa and idx[1] >= nCa and idx[2] < nva
                        and idx[3] >= nva,  # noqa: E221,E501
                        idx[0] >= noa and idx[1] < nCa and idx[2] >= nva
                        and idx[3] < nva,  # noqa: E221,E501
                        idx[0] < noa and idx[1] >= nCa and idx[2] >= nva
                        and idx[3] < nva,  # noqa: E221,E501
                        idx[0] >= noa and idx[1] < nCa and idx[2] < nva
                        and idx[3] >= nva))  # noqa: E221,E501
                ]
            sidcs = [idx for idx in sidcs if idx[2] != idx[3]]
            if not matrix.is_core_valence_separated:
                sidcs = [idx for idx in sidcs if idx[0] != idx[1]]

        # Group the indices by corresponding diagonal value
        def grouping(x):
            return np.round(diagonal[tuple(x)], decimals=12)

        gidcs = [[tuple(gitem) for gitem in group]
                 for key, group in itertools.groupby(sidcs, grouping)]
        igroup = 0  # The current diagonal value group we are in
        for (i, guess) in enumerate(guesses):
            # Extract indices of non-zero elements
            nonzeros = np.dstack(np.where(guess[block].to_ndarray() != 0))
            assert nonzeros.shape[0] == 1
            nonzeros = [tuple(nzitem) for nzitem in nonzeros[0]]
            if i > 0 and igroup + 1 < len(gidcs):
                if nonzeros[0] in gidcs[igroup + 1]:
                    igroup += 1
            for nz in nonzeros:
                assert nz in gidcs[igroup]

Example #53

0

Show file

def groupby(iterable, keyfunc):
    return itertools.groupby(sorted(iterable, key=keyfunc), keyfunc)

Example #54

0

Show file

File: mr_ngram_vocab.py Project: lantuzi/topicrawler

def reducer(lines):
    for key, values in it.groupby(lines, lambda t: t.rstrip()):
        print(key)

Example #55

0

Show file

def discover_catalog(mysql_conn, config):
    '''Returns a Catalog describing the structure of the database.'''

    filter_dbs_config = config.get('filter_dbs')

    if filter_dbs_config:
        filter_dbs_clause = ",".join(
            ["'{}'".format(db) for db in filter_dbs_config.split(",")])

        table_schema_clause = "WHERE table_schema IN ({})".format(
            filter_dbs_clause)
    else:
        table_schema_clause = """
        WHERE table_schema NOT IN (
        'information_schema',
        'performance_schema',
        'mysql',
        'sys'
        )"""

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            cur.execute("""
            SELECT table_schema,
                   table_name,
                   table_type,
                   table_rows
                FROM information_schema.tables
                {}
            """.format(table_schema_clause))

            table_info = {}

            for (db, table, table_type, rows) in cur.fetchall():
                if db not in table_info:
                    table_info[db] = {}

                table_info[db][table] = {
                    'row_count': rows,
                    'is_view': table_type == 'VIEW'
                }

            cur.execute("""
                SELECT table_schema,
                       table_name,
                       column_name,
                       data_type,
                       character_maximum_length,
                       numeric_precision,
                       numeric_scale,
                       column_type,
                       column_key
                    FROM information_schema.columns
                    {}
                    ORDER BY table_schema, table_name
            """.format(table_schema_clause))

            columns = []
            rec = cur.fetchone()
            while rec is not None:
                columns.append(Column(*rec))
                rec = cur.fetchone()

            entries = []
            for (k, cols) in itertools.groupby(
                    columns, lambda c: (c.table_schema, c.table_name)):
                cols = list(cols)
                (table_schema, table_name) = k
                schema = Schema(type='object',
                                properties={
                                    c.column_name: schema_for_column(c)
                                    for c in cols
                                })
                md = create_column_metadata(cols)
                md_map = metadata.to_map(md)

                md_map = metadata.write(md_map, (), 'database-name',
                                        table_schema)

                is_view = table_info[table_schema][table_name]['is_view']

                if table_schema in table_info and table_name in table_info[
                        table_schema]:
                    row_count = table_info[table_schema][table_name].get(
                        'row_count')

                    if row_count is not None:
                        md_map = metadata.write(md_map, (), 'row-count',
                                                row_count)

                    md_map = metadata.write(md_map, (), 'is-view', is_view)

                column_is_key_prop = lambda c, s: (
                    c.column_key == 'PRI' and s.properties[
                        c.column_name].inclusion != 'unsupported')

                key_properties = [
                    c.column_name for c in cols
                    if column_is_key_prop(c, schema)
                ]

                if not is_view:
                    md_map = metadata.write(md_map, (), 'table-key-properties',
                                            key_properties)

                entry = CatalogEntry(
                    table=table_name,
                    stream=table_name,
                    metadata=metadata.to_list(md_map),
                    tap_stream_id=common.generate_tap_stream_id(
                        table_schema, table_name),
                    schema=schema)

                entries.append(entry)

    return Catalog(entries)

Example #56

0

Show file

File: purchase.py Project: Plantbugtree/odoo

    def action_create_invoice(self):
        """Create the invoice associated to the PO.
        """
        precision = self.env['decimal.precision'].precision_get('Product Unit of Measure')

        # 1) Prepare invoice vals and clean-up the section lines
        invoice_vals_list = []
        for order in self:
            if order.invoice_status != 'to invoice':
                continue

            pending_section = None
            # Invoice values.
            invoice_vals = order._prepare_invoice()
            # Invoice line values (keep only necessary sections).
            for line in order.order_line:
                if line.display_type == 'line_section':
                    pending_section = line
                    continue
                if not float_is_zero(line.qty_to_invoice, precision_digits=precision):
                    if pending_section:
                        invoice_vals['invoice_line_ids'].append((0, 0, pending_section._prepare_account_move_line()))
                        pending_section = None
                    invoice_vals['invoice_line_ids'].append((0, 0, line._prepare_account_move_line()))
            invoice_vals_list.append(invoice_vals)

        if not invoice_vals_list:
            raise UserError(_('There is no invoiceable line. If a product has a control policy based on received quantity, please make sure that a quantity has been received.'))

        # 2) group by (company_id, partner_id, currency_id) for batch creation
        new_invoice_vals_list = []
        for grouping_keys, invoices in groupby(invoice_vals_list, key=lambda x: (x.get('company_id'), x.get('partner_id'), x.get('currency_id'))):
            origins = set()
            payment_refs = set()
            refs = set()
            ref_invoice_vals = None
            for invoice_vals in invoices:
                if not ref_invoice_vals:
                    ref_invoice_vals = invoice_vals
                else:
                    ref_invoice_vals['invoice_line_ids'] += invoice_vals['invoice_line_ids']
                origins.add(invoice_vals['invoice_origin'])
                payment_refs.add(invoice_vals['invoice_payment_ref'])
                refs.add(invoice_vals['ref'])
            ref_invoice_vals.update({
                'ref': ', '.join(refs)[:2000],
                'invoice_origin': ', '.join(origins),
                'invoice_payment_ref': len(payment_refs) == 1 and payment_refs.pop() or False,
            })
            new_invoice_vals_list.append(ref_invoice_vals)
        invoice_vals_list = new_invoice_vals_list

        # 3) Create invoices.
        moves = self.env['account.move']
        AccountMove = self.env['account.move'].with_context(default_move_type='in_invoice')
        for vals in invoice_vals_list:
            moves |= AccountMove.with_company(vals['company_id']).create(vals)

        # 4) Some moves might actually be refunds: convert them if the total amount is negative
        # We do this after the moves have been created since we need taxes, etc. to know if the total
        # is actually negative or not
        moves.filtered(lambda m: m.currency_id.round(m.amount_total) < 0).action_switch_invoice_into_refund_credit_note()

        return self.action_view_invoice(moves)

Example #57

0

Show file

File: models.py Project: ryangmyeong/automation_system

def get_raw(entries, raw_data=False, med_and_avg=False, max_and_min=False):
    entries = sorted(list(entries), key=lambda x: x['case_name'])
    grouped = groupby(entries, key=lambda x: x['case_name'])
    result_list = []
    for case_name, group in grouped:
        json_data = [result for result in group]
        results = [result['result'] for result in json_data]
        unit = [result['unit'] for result in json_data][0]

        if med_and_avg:
            try:
                num_results = filter(is_number, results)
                results = [float(result) for result in num_results]
                results.sort()
                median = results[int(len(results) /
                                     2)] if len(results) % 2 else None
                avg = sum(results) / len(results) if median is None else None
                result = median if median else avg
            except ZeroDivisionError:
                result = 'N/A'
                avg = 'N/A'
                median = 'N/A'
            except TypeError:
                result = 'N/A'
                avg = 'N/A'
                median = 'N/A'
            except ValueError:
                result = 'N/A'
                avg = 'N/A'
                median = 'N/A'
        else:
            median = None
            avg = None
        try:
            if max_and_min:
                max_val = max(results)
                min_val = min(results)
            else:
                max_val = None
                min_val = None
        except ValueError:
            max_val = None
            min_val = None

        vic_dict = {
            'full boot': {
                'stack_bar': ('report', 'csv', 'boot chart'),
            },
            'boot_analyze_2.0': {
                'stack_bar': ('report', 'csv', 'boot chart'),
            },
            'full_boot': {
                'stack_bar': ('report', 'csv', 'boot chart'),
            },
            'Acrn_boot_time': {
                'stack_bar': ('report', 'csv', 'boot chart'),
            },
            'instance_90': {
                'usage_chart': ('aic_instance', 'json', 'usage trend')
            },
            'game_90': {
                'usage_chart_by_instances': ('aic_game', 'json', 'usage trend')
            },
            'instance_ins': {
                'usage_chart_v2': ('aic_instance', 'json', 'usage trend')
            }
        }

        if raw_data:
            if case_name in vic_dict:
                vic = vic_dict[case_name]
            else:
                vic = None
            raw = json_data
        else:
            vic = None
            raw = None

        rg = ResultGroup(case_name=case_name,
                         unit=unit,
                         maximum=max_val,
                         minimum=min_val,
                         median=median,
                         average=avg,
                         raw=raw,
                         result=result,
                         fluc=None,
                         vic=vic)
        result_list.append(rg)

    return result_list

Example #58

0

Show file

File: validation.py Project: simudream/connexion

 def __init__(self, parameters):
     self.parameters = {k: list(g) for k, g in itertools.groupby(parameters, key=lambda p: p['in'])}

Example #59

0

Show file

File: discovery.py Project: sebastianbertoli/intake

def autodiscover(path=None, plugin_prefix='intake_', do_package_scan=True):
    r"""Discover intake drivers.

    In order of decreasing precedence:

    - Respect the 'drivers' section of the intake configuration file.
    - Find 'intake.drivers' entrypoints provided by any Python packages in the
      environment.
    - Search all packages in the environment for names that begin with
      ``intake\_``. Import them and scan them for subclasses of
      ``intake.source.base.Plugin``. This was previously the *only* mechanism
      for auto-discoverying intake drivers, and it is maintained for backward
      compatibility. In a future release, intake will issue a warning if any
      packages are located by the method that do not also have entrypoints.

    Parameters
    ----------
    path : str or None
        Default is ``sys.path``.
    plugin_prefix : str
        DEPRECATED. Default is 'intake\_'.
    do_package_scan : boolean
        Default is True. In the future, the default will be changed to False,
        and the option may eventually be removed entirely.

    Returns
    -------
    drivers : dict
        Name mapped to driver class.
    """
    # Discover drivers via package scan.
    if do_package_scan:
        package_scan_results = _package_scan(path, plugin_prefix)
        if package_scan_results:
            warnings.warn(
                "The option `do_package_scan` may be removed in a future release.",
                PendingDeprecationWarning)
    else:
        package_scan_results = {}

    # Discover drivers via entrypoints.
    group = entrypoints.get_group_named('intake.drivers', path=path)
    group_all = entrypoints.get_group_all('intake.drivers', path=path)
    if len(group_all) != len(group):
        # There are some name collisions. Let's go digging for them.
        for name, matches in itertools.groupby(group_all, lambda ep: ep.name):
            matches = list(matches)
            if len(matches) != 1:
                winner = group[name]
                logger.debug(
                    "There are %d 'intake.driver' entrypoints for the name "
                    "%r. They are %r. The match %r has won the race.",
                    len(matches),
                    name,
                    matches,
                    winner)

    for name, entrypoint in group.items():
        logger.debug("Discovered entrypoint '%s = %s.%s'",
                     name,
                     entrypoint.module_name,
                     entrypoint.object_name)
        if name in package_scan_results:
            cls = package_scan_results[name]
            del package_scan_results[name]
            logger.debug("Entrypoint shadowed package_scan result '%s = %s.%s'",
                         name, cls.__module__, cls.__name__)

    # Discover drivers via config.
    drivers_conf = conf.get('drivers', {})
    logger.debug("Using configuration file at %s", cfile())
    for name, dotted_object_name in drivers_conf.items():
        if not dotted_object_name:
            logger.debug('Name %s is banned in config file', name)
            if name in group:
                entrypoint = group[name]
                del group[name]
                logger.debug("Disabled entrypoint '%s = %s.%s'",
                             entrypoint.name,
                             entrypoint.module_name,
                             entrypoint.object_name)
            if name in package_scan_results:
                cls = package_scan_results[name]
                del package_scan_results[name]
                logger.debug("Disabled package_scan result '%s = %s.%s'",
                             name, cls.__module__, cls.__name__)
            continue
        module_name, object_name = dotted_object_name.rsplit('.', 1)
        entrypoint = entrypoints.EntryPoint(name, module_name, object_name)
        logger.debug("Discovered config-specified '%s = %s.%s'",
                     entrypoint.name,
                     entrypoint.module_name,
                     entrypoint.object_name)
        if name in group:
            shadowed = group[name]
            logger.debug("Config shadowed entrypoint '%s = %s.%s'",
                         shadowed.name,
                         shadowed.module_name,
                         shadowed.object_name)
        if name in package_scan_results:
            cls = package_scan_results[name]
            del package_scan_results[name]
            logger.debug("Config shadowed package scan result '%s = %s.%s'",
                         name, cls.__module__, cls.__name__)
        group[name] = entrypoint

    # Discovery is complete.

    if package_scan_results:
        warnings.warn(
            f"The drivers {list(package_scan_results)} do not specify entry_"
            f"points and were only discovered via a package scan. This may "
            f"break in a future release of intake. The packages should be "
            f"updated.",
            FutureWarning)

    # Load entrypoints. Any that were shadowed or banned have already been
    # removed above.
    drivers = {}
    for entrypoint in group.values():
        try:
            drivers[entrypoint.name] = _load_entrypoint(entrypoint)
        except ConfigurationError:
            logger.exception(
                "Error while loading entrypoint %s",
                entrypoint.name)
            continue
        logger.debug("Loaded entrypoint '%s = %s.%s'",
                     entrypoint.name,
                     entrypoint.module_name,
                     entrypoint.object_name)

    # Now include any package scan results. Any that were shadowed or
    # banned have already been removed above.
    for name, cls in package_scan_results.items():
        drivers[name] = cls
        logger.debug("Loaded package scan result '%s = %s.%s'",
                     name,
                     cls.__module__,
                     cls.__name__)

    return drivers

Example #60

0

Show file

File: debug.py Project: mocorrovictor/mergify-engine

def report(
    url: str,
) -> typing.Union[context.Context, github.GithubInstallationClient, None]:
    path = url.replace("https://github.com/", "")

    pull_number: typing.Optional[str]
    repo: typing.Optional[str]

    try:
        owner, repo, _, pull_number = path.split("/")
    except ValueError:
        pull_number = None
        try:
            owner, repo = path.split("/")
        except ValueError:
            owner = path
            repo = None

    try:
        client = github.get_client(owner)
    except exceptions.MergifyNotInstalled:
        print(f"* Mergify is not installed on account {owner}")
        return None

    # Do a dumb request just to authenticate
    client.get("/")

    if client.auth.installation is None:
        print("No installation detected")
        return None

    print("* INSTALLATION ID: %s" % client.auth.installation["id"])

    cached_sub, db_sub = utils.async_run(
        subscription.Subscription.get_subscription(client.auth.owner_id),
        subscription.Subscription._retrieve_subscription_from_db(
            client.auth.owner_id),
    )

    if repo is None:
        slug = None
    else:
        slug = owner + "/" + repo

    print("* SUBSCRIBED (cache/db): %s / %s" %
          (cached_sub.active, db_sub.active))
    print("* Features (cache):")
    for f in cached_sub.features:
        print(f"  - {f.value}")
    report_sub(client.auth.installation["id"], cached_sub, "ENGINE-CACHE",
               slug)
    report_sub(client.auth.installation["id"], db_sub, "DASHBOARD", slug)

    utils.async_run(report_worker_status(client.auth.owner))

    if repo is not None:

        repo_info = client.item(f"/repos/{owner}/{repo}")
        print(
            f"* REPOSITORY IS {'PRIVATE' if repo_info['private'] else 'PUBLIC'}"
        )

        print("* CONFIGURATION:")
        mergify_config = None
        try:
            filename, mergify_config_content = rules.get_mergify_config_content(
                client, repo)
        except rules.NoRules:  # pragma: no cover
            print(".mergify.yml is missing")
        else:
            print(f"Config filename: {filename}")
            print(mergify_config_content.decode())
            try:
                mergify_config = rules.UserConfigurationSchema(
                    mergify_config_content)
            except rules.InvalidRules as e:  # pragma: no cover
                print("configuration is invalid %s" % str(e))
            else:
                mergify_config["pull_request_rules"].rules.extend(
                    engine.DEFAULT_PULL_REQUEST_RULES.rules)

        if pull_number is None:
            for branch in client.items(f"/repos/{owner}/{repo}/branches"):
                q = queue.Queue(
                    utils.get_redis_for_cache(),
                    repo_info["owner"]["id"],
                    repo_info["owner"]["login"],
                    repo_info["id"],
                    repo_info["name"],
                    branch["name"],
                )
                pulls = q.get_pulls()
                if not pulls:
                    continue

                print(f"* QUEUES {branch['name']}:")

                for priority, grouped_pulls in itertools.groupby(
                        pulls, key=lambda v: q.get_config(v)["priority"]):
                    try:
                        fancy_priority = merge.PriorityAliases(priority).name
                    except ValueError:
                        fancy_priority = priority
                    formatted_pulls = ", ".join(
                        (f"#{p}" for p in grouped_pulls))
                    print(f"** {formatted_pulls} (priority: {fancy_priority})")
        else:
            pull_raw = client.item(
                f"/repos/{owner}/{repo}/pulls/{pull_number}")
            ctxt = context.Context(
                client,
                pull_raw,
                cached_sub,
                [{
                    "event_type": "mergify-debugger",
                    "data": {}
                }],
            )

            # FIXME queues could also be printed if no pull number given
            q = queue.Queue.from_context(ctxt)
            print("* QUEUES: %s" % ", ".join([f"#{p}" for p in q.get_pulls()]))
            print("* PULL REQUEST:")
            pr_data = dict(ctxt.pull_request.items())
            pprint.pprint(pr_data, width=160)

            print("is_behind: %s" % ctxt.is_behind)

            print("mergeable_state: %s" % ctxt.pull["mergeable_state"])

            print("* MERGIFY LAST CHECKS:")
            for c in ctxt.pull_engine_check_runs:
                print("[%s]: %s | %s" %
                      (c["name"], c["conclusion"], c["output"].get("title")))
                print("> " +
                      "\n> ".join(c["output"].get("summary").split("\n")))

            if mergify_config is not None:
                print("* MERGIFY LIVE MATCHES:")
                match = mergify_config[
                    "pull_request_rules"].get_pull_request_rule(ctxt)
                summary_title, summary = actions_runner.gen_summary(
                    ctxt, match)
                print("> %s" % summary_title)
                print(summary)

            return ctxt

    return client