def most_common(self, n=None): '''List the n most common elements and their counts from the most common to the least. If n is None, then list all element counts. ''' if n is None: return sorted(self.iteritems(), key=itemgetter(1), reverse=True) return nlargest(n, self.iteritems(), key=itemgetter(1))
def plotPerStreamAboveFirstAndPrepareStack(points, allStackTimes, ax, stream, height, streamHeightCut, doPlot, addToStackTimes, color, threadOffset): points = sorted(points, key=attrgetter('x')) points = reduceSortedPoints(points) streamHeight = 0 preparedTimes = [] for t1,t2 in zip(points, points[1:]): streamHeight += t1.y # We make a cut here when plotting because the first row for # each stream was already plotted previously and we do not # need to plot it again. And also we want to count things # properly in allStackTimes. We want to avoid double counting # or missing running modules and this is complicated because # we counted the modules in the first row already. if streamHeight < streamHeightCut: continue preparedTimes.append((t1.x,t2.x-t1.x, streamHeight)) preparedTimes.sort(key=itemgetter(2)) preparedTimes = mergeContiguousBlocks(preparedTimes) for nthreads, ts in groupby(preparedTimes, itemgetter(2)): theTS = [(t[0],t[1]) for t in ts] if doPlot: theTimes = [(t[0]/1000.,t[1]/1000.) for t in theTS] yspan = (stream-0.4+height,height*(nthreads-1)) ax.broken_barh(theTimes, yspan, facecolors=color, edgecolors=color, linewidth=0) if addToStackTimes: allStackTimes[color].extend(theTS*(nthreads-threadOffset))
def piechart(self,rc): user_count = {} for (user_name,req_id) in getDistinct(Log,'user_name','req_id'): if user_name == None: continue if not user_count.has_key(user_name): user_count[user_name] = 0 user_count[user_name] += 1 kw = {} kw['data'] = [('user','number of requests')] kw['title']='Log entries by user' for(k,v) in sorted(user_count.items(),key=itemgetter(1),reverse=True): kw['data'].append((k,v)) rc.ctx['by_user'] = pieChart(rc,'by_user',**kw) view_count = {} for (vc,vn,ri) in getDistinct(Log,'view_class','view_name','req_id'): if vc == None or vn == None or ri == None: continue key = "%s:%s"%(vc,vn) if not view_count.has_key(key): view_count[key] = 0 view_count[key] += 1 kw = {} kw["data"] = [('view_name','request count')] kw['title'] = 'Log entries by view_name' for (k,v) in sorted(view_count.items(),key=itemgetter(1),reverse=True): kw['data'].append((k,v)) rc.ctx['by_view'] = pieChart(rc,'by_view',**kw) return self.render(rc)
def test_bulk_export(self): # Clear out some context vars, to properly simulate how this is run from the export task # Besides, core functionality shouldn't need the c context vars c.app = c.project = None f = tempfile.TemporaryFile() self.tracker.bulk_export(f) f.seek(0) tracker = json.loads(f.read()) tickets = sorted(tracker['tickets'], key=operator.itemgetter('summary')) assert_equal(len(tickets), 2) ticket_foo = tickets[1] assert_equal(ticket_foo['summary'], 'foo') assert_equal(ticket_foo['custom_fields']['_milestone'], '1.0') posts_foo = ticket_foo['discussion_thread']['posts'] assert_equal(len(posts_foo), 1) assert_equal(posts_foo[0]['text'], 'silly comment') tracker_config = tracker['tracker_config'] assert_true('options' in tracker_config.keys()) assert_equal(tracker_config['options']['mount_point'], 'bugs') milestones = sorted(tracker['milestones'], key=operator.itemgetter('name')) assert_equal(milestones[0]['name'], '1.0') assert_equal(milestones[1]['name'], '2.0') saved_bins_summaries = [bin['summary'] for bin in tracker['saved_bins']] assert_true('Closed Tickets' in saved_bins_summaries)
def sort_stats(self, sortedby=None): """Return the stats sorted by sortedby variable.""" if sortedby is None: # No need to sort... return self.stats tree = glances_processes.is_tree_enabled() if sortedby == 'io_counters' and not tree: # Specific case for io_counters # Sum of io_r + io_w try: # Sort process by IO rate (sum IO read + IO write) self.stats.sort(key=lambda process: process[sortedby][0] - process[sortedby][2] + process[sortedby][1] - process[sortedby][3], reverse=glances_processes.sort_reverse) except Exception: self.stats.sort(key=operator.itemgetter('cpu_percent'), reverse=glances_processes.sort_reverse) else: # Others sorts if tree: self.stats.set_sorting(sortedby, glances_processes.sort_reverse) else: try: self.stats.sort(key=operator.itemgetter(sortedby), reverse=glances_processes.sort_reverse) except (KeyError, TypeError): self.stats.sort(key=operator.itemgetter('name'), reverse=False) return self.stats
def publish_samples(self, context, samples): """Publish samples on RPC. :param context: Execution context from the service or RPC call. :param samples: Samples from pipeline after transformation. """ meters = [ utils.meter_message_from_counter( sample, cfg.CONF.publisher.telemetry_secret) for sample in samples ] topic = cfg.CONF.publisher_rpc.metering_topic self.local_queue.append((context, topic, meters)) if self.per_meter_topic: for meter_name, meter_list in itertools.groupby( sorted(meters, key=operator.itemgetter('counter_name')), operator.itemgetter('counter_name')): meter_list = list(meter_list) topic_name = topic + '.' + meter_name LOG.debug('Publishing %(m)d samples on %(n)s', {'m': len(meter_list), 'n': topic_name}) self.local_queue.append((context, topic_name, meter_list)) self.flush()
def main(): now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') print """<p>Last updated: %s\n\n</p>""" % now for project in sys.argv[1:]: #todo = get_tasks_for_project(project, "status:pending due.before:60days") todo = get_tasks_for_project(project, "status:pending") try: tmp = sorted(todo, key=itemgetter('due'), reverse=False) todo = tmp except: pass done = get_tasks_for_project(project, "status:completed") done = sorted(done, key=itemgetter('end'), reverse=True) if 0 == len(todo) + len(done): continue if len(todo) > TOTAL_TASKS_TO_SHOW: todo = todo[0:TOTAL_TASKS_TO_SHOW] done = [] if len(todo) + len(done) > TOTAL_TASKS_TO_SHOW: done = done[0 : TOTAL_TASKS_TO_SHOW - len(todo)] print """<dl class="project-box sticky">""" print """<dt class="project-name">%s</dt>""" % project print("<dd><ul>") print_html_descriptions(todo) print_html_descriptions(done) print("</ul></dd>") print print """</dl>\n\n"""
def __try_save_next_bunch_of_file_states(self): """Check if we have multiple states to store to the DB, and do it.""" assert not in_main_thread() with self.__file_states_ready_to_write_lock: all_states = self.__file_states_ready_to_write.values() self.__file_states_ready_to_write = {} # "states" contains tuples like (base_dir_id, state). # Group them by base_dir_id, and write multiple file states at once. if all_states: logger.debug('Writing %i file state(s) at once', len(all_states)) grouped_by_base_dir = sorted_groupby(all_states, key=itemgetter(0)) for base_dir_id, per_base_dir in grouped_by_base_dir: states_to_write = imap(itemgetter(1), per_base_dir) logger.debug('Writing states for base dir %r', base_dir_id) with db.RDB() as rdbw: HostQueries.HostFiles.add_file_states( base_dir_id, states_to_write, rdbw) logger.debug('Wrote the states')
def getIndex(self,peerMACaddr=None,localLinkId=None): ''' Given the peer MAC Address return the index in the list of peers. ''' if peerMACaddr: return map(itemgetter('peerMac'), self.my_list).index(peerMACaddr) if localLinkId: return map(itemgetter('localLinkId'), self.my_list).index(localLinkId)
def folder_stats(self, folder): if path.isdir(folder): for file in listdir(folder): self.folder_stats(folder + "\\" + file) else: file_words, file_letters = self.file_stats(folder) sorted_words = sorted(file_words.iteritems(), key=operator.itemgetter(1), reverse=True) sorted_letters = sorted(file_letters.iteritems(), key=operator.itemgetter(1), reverse=True) self.files_stats += "\nFailas: " self.files_stats += folder self.files_stats += "\nPasikartojantys žodžiai " \ "(pagal dažnumą):\n" self.files_stats += str(sorted_words) self.files_stats += "\nPasikartojantys simboliai " \ "(pagal dažnumą):\n" self.files_stats += str(sorted_letters) for word in file_words: if word not in self.words.keys(): self.words[word] = 0 self.words[word] += file_words[word] for letter in file_letters: if letter not in self.letters.keys(): self.letters[letter] = 0 self.letters[letter] += file_letters[letter]
def filterRepeatPair(matchPair): newMatchPair = [] # Format : [[3, 5, 2486, 2532, 2486, 'Read48_d'] ] inNoList = [] outNoList = [] matchPair.sort(key = itemgetter(0)) for key, items in groupby(matchPair, itemgetter(0)): ct = 0 anotherSideList = [] for eachitem in items: ct = ct +1 anotherSideList.append(eachitem[1]) if len(set(anotherSideList)) > 1 : inNoList.append(key) matchPair.sort(key= itemgetter(1)) for key, items in groupby(matchPair, itemgetter(1)): ct = 0 anotherSideList = [] for eachitem in items: ct = ct +1 anotherSideList.append(eachitem[0]) if len(set(anotherSideList)) > 1 : outNoList.append(key) for eachitem in matchPair: if not eachitem[0] in inNoList and not eachitem[1] in outNoList: newMatchPair.append(eachitem) return newMatchPair
def _asset_difference_search(self, type, args): if not args: return [] having_values = tuple(map(itemgetter(2), args)) where = ' AND '.join( map(lambda x: '(SUM(bal2) %(operator)s %%s)' % { 'operator':x[1]},args)) query = self.env['account.move.line']._query_get() self._cr.execute(('SELECT pid AS partner_id, SUM(bal2) FROM ' \ '(SELECT CASE WHEN bal IS NOT NULL THEN bal ' \ 'ELSE 0.0 END AS bal2, p.id as pid FROM ' \ '(SELECT (debit-credit) AS bal, partner_id ' \ 'FROM account_move_line l ' \ 'WHERE account_id IN ' \ '(SELECT id FROM account_account '\ 'WHERE type=%s AND active) ' \ 'AND reconciled IS FALSE ' \ 'AND '+query+') AS l ' \ 'RIGHT JOIN res_partner p ' \ 'ON p.id = partner_id ) AS pl ' \ 'GROUP BY pid HAVING ' + where), (type,) + having_values) res = self._cr.fetchall() if not res: return [('id', '=', '0')] return [('id', 'in', map(itemgetter(0), res))]
def test_extractor(self): """Reads a review of Alice in Wonderland and extracts the most frequent nouns found in the text as well as the most frequent 'noun phrases'. """ text = readData("alicereview.txt") extractor = getUtility(ITermExtractor) (simple_terms, np_terms) = extractor.extract(text) important_terms = sorted(simple_terms.items(), key=itemgetter(1), reverse=True)[:10] self.failUnless( important_terms == [ ("alice", 80), ("queen", 19), ("rabbit", 15), ("hatter", 13), ("door", 13), ("cat", 13), ("chapter", 12), ("king", 12), ("turtle", 11), ("duchess", 11), ] ) important_np_terms = sorted(np_terms.items(), key=itemgetter(1), reverse=True)[:10] self.failUnless( important_np_terms == [("white rabbit", 8), ("mock turtle", 8), ("cheshire cat", 5), ("march hare", 4), ("mad hatter", 3)] )
def change_password(self, fields): new_password = operator.itemgetter('new_password')( dict(map(operator.itemgetter('name', 'value'), fields)) ) user_id = request.env.user user_id.check_password(new_password) return super(PasswordSecuritySession, self).change_password(fields)
def GetGoAnnotation(seqids): db = pymysql.connect(host = "mysql-amigo.ebi.ac.uk", user = "******", passwd = "amigo", db = "go_latest", port = 4085) cur = db.cursor() cur.execute( """ SELECT term.name, term.acc, term.term_type FROM gene_product INNER JOIN dbxref ON (gene_product.dbxref_id=dbxref.id) INNER JOIN species ON (gene_product.species_id=species.id) INNER JOIN association ON (gene_product.id=association.gene_product_id) INNER JOIN evidence ON (association.id=evidence.association_id) INNER JOIN term ON (association.term_id=term.id) WHERE dbxref.xref_key = %s; """, seqids) List = list() GO = list() f= cur.fetchall() for i in f: List.append(i[0] + ":" + i[2]) GO.append(i[1]) List = list(map(itemgetter(0), groupby(List))) GO = list(map(itemgetter(0), groupby(GO))) db.close() return[seqids,List,GO]
def action(args): genes = GenomeIntervalTree.from_table(args.refgene, parser=UCSCTable.REF_GENE, mode='tx') # read in only the columns we care about, because real data can be too large sometimes headers=['#Chr1','Pos1','Chr2','Pos2','Type','Size','num_Reads'] reader = pandas.read_csv(args.bd_file, comment='#', delimiter='\t',header=None,usecols=[0,1,3,4,6,7,9], names=headers) #Convert to a dictionary for processing clearly rows = reader.T.to_dict().values() output = [] for row in rows: # each segment is assigned to a gene or exon if either the try: chr1 = 'chr'+str(chromosomes[row['#Chr1']]) chr2 = 'chr'+str(chromosomes[row['Chr2']]) except KeyError: print('chrm not being processed: {} or {}'.format(row['#Chr1'], row['Chr2'])) continue row['Event_1'], row['Gene_1']=set_gene_event(row['Pos1'], chr1, genes) row['Event_2'], row['Gene_2']=set_gene_event(row['Pos2'], chr2, genes) #discard those between -101 and 101 if int(row['Size']) not in range(-101,101): if row['Type']=='CTX': row['Size']='N/A' output.append(row) output.sort(key=itemgetter('Event_1')) output.sort(key=itemgetter('num_Reads'), reverse=True) fieldnames=['Event_1','Event_2','Type','Size','Gene_1','Gene_2','num_Reads'] writer = csv.DictWriter(args.outfile, extrasaction='ignore',fieldnames=fieldnames, delimiter='\t') writer.writeheader() writer.writerows(output)
def main(): binary_label = True exclude_stopwords = True data_nosw, data_positive_nosw, data_negative_nosw = (scan.scan('finemedium.txt', exclude_stopwords, binary_label)) data = [] for datum in data_nosw: new_datum = datum[0].split() new_datum.append(datum[1]) data.append(new_datum) #print data positive_review_nosw = ' '.join([row[0] for row in data_positive_nosw]) dict_positive_nosw = utils.get_unigram(positive_review_nosw)[0] positive_words = [x[0] for x in sorted(dict_positive_nosw.items(), key=operator.itemgetter(1), reverse = True)[1:501]] negative_review_nosw = ' '.join([row[0] for row in data_negative_nosw]) dict_negative_nosw = utils.get_unigram(negative_review_nosw)[0] negative_words = [x[0] for x in sorted(dict_negative_nosw.items(), key=operator.itemgetter(1), reverse = True)[1:501]] all_words = positive_words all_words.extend(x for x in negative_words if x not in positive_words) length = len(data) train_data = data[:int(length*.8)] test_data = data[int(length*.8):] decision_tree = dt.train(train_data, all_words) test_results = dt.test(decision_tree, test_data) print test_results
def termFrequency(term, senStats, flag): # This function takes in a term and a sentence and # compute its frequency according to this formula: # tf(t,s) = freq(t,s)/MAXarg(freq(x,s)) # The Flag indicates if it is the title, if so compute frequency # using this formula: # tf(t,s) = 0.5 + (0.5 * freq(t,s)/MAXarg(freq(x,s))) if flag: freqTS = 0.5 * float(senStats[term]) # find the term that has the greatest frequency maxARG = float(max(senStats.iteritems(), key=operator.itemgetter(1))[1]) tf = 0.5 + (freqTS/maxARG) #print "termFrequency( ", term, " ) = ",freqTS, " / ", maxARG, " = ", tf else: freqTS = float(senStats[term]) # find the term that has the greatest frequency maxARG = float(max(senStats.iteritems(), key=operator.itemgetter(1))[1]) tf = (freqTS/maxARG) #print "termFrequency( ", term, " ) = ",freqTS, " / ", maxARG, " = ", tf #print "termFrequency( ", term, " ) = ",freqTS, " / ", maxARG, " = ", tf return tf
def show_seq(clus_obj, index): """Get the precursor and map sequences to it. this way we create a positional map.""" current = clus_obj.clus clus_seqt = clus_obj.seq clus_locit = clus_obj.loci itern = 0 for idc in current.keys(): itern += 1 timestamp = str(idc) seqListTemp = () f = open("/tmp/"+timestamp+".fa","w") for idl in current[idc].loci2seq.keys(): seqListTemp = list(set(seqListTemp).union(current[idc].loci2seq[idl])) maxscore = 0 for s in seqListTemp: score = calculate_size(clus_seqt[s].freq) maxscore = max(maxscore,score) clus_seqt[s].score = score seq = clus_seqt[s] f.write(">"+s+"\n"+seq.seq+"\n") f.close() locilen_sorted = sorted(current[idc].locilen.iteritems(), key = operator.itemgetter(1),reverse = True) lmax = clus_locit[locilen_sorted[0][0]] f = open("/tmp/"+timestamp+".bed","w") f.write("%s\t%s\t%s\t.\t.\t%s\n" % (lmax.chr,lmax.start,lmax.end,lmax.strand)) f.close() os.system("bedtools getfasta -s -fi "+index+" -bed /tmp/"+timestamp+".bed -fo /tmp/"+timestamp+".pre.fa") os.system("bowtie2-build /tmp/"+timestamp+".pre.fa /tmp/"+timestamp+".pre.ind >/dev/null 2>&1") os.system("bowtie2 --rdg 7,3 --mp 4 --end-to-end --no-head --no-sq -D 20 -R 3 -N 0 -i S,1,0.8 -L 3 -f /tmp/"+timestamp+".pre.ind /tmp/"+timestamp+".fa -S /tmp/"+timestamp+".map >>bowtie.log 2>&1") f = open("/tmp/"+timestamp+".map","r") seqpos = {} minv = 10000000 for line in f: line = line.strip() cols = line.split("\t") seqpos[cols[0]] = int(cols[3]) if minv>int(cols[3]): minv = int(cols[3]) f.close() seqpos_sorted = sorted(seqpos.iteritems(), key = operator.itemgetter(1),reverse = False) showseq = "" showseq_plain = "" for (s,pos) in seqpos_sorted: ratio = (clus_seqt[s].score*1.0/maxscore*100.0) realScore = (math.log(ratio,2)*2) if realScore<0: realScore = 0 # "score %s max %s ratio %s real %.0f" % (clus_seqt[s].score,maxscore,ratio,realScore) ##calculate the mean expression of the sequence and change size letter showseq_plain += "<br>%s<a style = \"font-size:%.0fpx;\"href = javascript:loadSeq(\"%s\")>%s</a>" % ("".join("." for i in range(pos-1)),realScore+10,s,clus_seqt[s].seq) #showseq+ = seqviz.addseq(pos-1,clus_seqt[s].len,clus_seqt[s].seq) #current[idc].showseq = showseq current[idc].showseq_plain = showseq_plain os.system("rm /tmp/"+timestamp+"*") clus_obj.clus = current clus_obj.seq = clus_seqt return clus_obj
def nsmallest(n, iterable, key=None): """Find the n smallest elements in a dataset. Equivalent to: sorted(iterable, key=key)[:n] """ # Short-cut for n==1 is to use min() when len(iterable)>0 if n == 1: it = iter(iterable) head = list(islice(it, 1)) if not head: return [] if key is None: return [min(chain(head, it))] return [min(chain(head, it), key=key)] # When n>=size, it's faster to use sorted() try: size = len(iterable) except (TypeError, AttributeError): pass else: if n >= size: return sorted(iterable, key=key)[:n] # When key is none, use simpler decoration if key is None: it = izip(iterable, count()) # decorate result = _nsmallest(n, it) return map(itemgetter(0), result) # undecorate # General case, slowest method in1, in2 = tee(iterable) it = izip(imap(key, in1), count(), in2) # decorate result = _nsmallest(n, it) return map(itemgetter(2), result) # undecorate
def getTagCloudHtml(self,numWords=100, filterFunc=None): tagHtmlStr = '' tagDict = self.tagDict if( filterFunc != None): tagDict = self.filterWords(filterFunc) if( len(tagDict) > 0): #first get sorted wordlist (reverse sorted by frequency) tagWordList = sorted(tagDict.items(), key=operator.itemgetter(1),reverse=True) totalTagWords = len(tagWordList) #now extract top 'numWords' from the list and then sort it with alphabetical order. #comparison should be case-insensitive tagWordList = sorted(tagWordList[0:numWords], key=operator.itemgetter(0), cmp=lambda x,y: cmp(x.lower(), y.lower()) ) minFreq = min(tagWordList,key=operator.itemgetter(1))[1] self.minFreqLog = math.log(minFreq) maxFreq = max(tagWordList,key=operator.itemgetter(1))[1] self.maxFreqLog = math.log(maxFreq) difflog = self.maxFreqLog-self.minFreqLog #if the minfreqlog and maxfreqlog are nearly same then makesure that difference is at least 0.001 to avoid #division by zero errors later. assert(difflog >= 0.0) if( difflog < 0.001): self.maxFreqLog = self.minFreqLog+0.001 #change minFreqLog in such a way smallest log(freq)-minFreqLog is greater than 0 self.minFreqLog = self.minFreqLog-((self.maxFreqLog-self.minFreqLog)/self.fontsizevariation) #change the font size between "-2" to "+8" relative to current font size tagHtmlStr = ' '.join([('<font size="%+d" class="tagword">%s(%d)</font>\n'%(self.__getTagFontSize(freq), x,freq)) for x,freq in tagWordList]) return(tagHtmlStr)
def execute(self, quals, columns, sortkeys=None): sortkeys = sortkeys or [] log_to_postgres(str(sorted(quals))) log_to_postgres(str(sorted(columns))) if (len(sortkeys)) > 0: log_to_postgres("requested sort(s): ") for k in sortkeys: log_to_postgres(k) if self.test_type == 'None': return None elif self.test_type == 'iter_none': return [None, None] else: if (len(sortkeys) > 0): # testfdw don't have tables with more than 2 fields, without # duplicates, so we only need to worry about sorting on 1st # asked column k = sortkeys[0]; res = self._as_generator(quals, columns) if (self.test_type == 'sequence'): return sorted(res, key=itemgetter(k.attnum - 1), reverse=k.is_reversed) else: return sorted(res, key=itemgetter(k.attname), reverse=k.is_reversed) return self._as_generator(quals, columns)
def chartProperties(counter,path): seen_properties = sorted(counter, key=lambda x: x[1],reverse=True) seen_values_pct = map(itemgetter(1), tupleCounts2Percents(seen_properties)) seen_values_pct = ['{:.1%}'.format(item)for item in seen_values_pct] plt.figure() numberchart = plt.bar(range(len(seen_properties)), map(itemgetter(1), seen_properties), width=0.9,alpha=0.6) plt.xticks(range(len(seen_properties)), map(itemgetter(0), seen_properties),rotation=90,ha='left') plt.ylabel('Occurrences') plot_margin = 1.15 x0, x1, y0, y1 = plt.axis() plt.axis((x0, x1, y0, y1*plot_margin)) plt.tick_params(axis='both', which='major', labelsize=8) plt.tick_params(axis='both', which='minor', labelsize=8) plt.tight_layout() autolabel(numberchart,seen_values_pct) plt.savefig(path) plt.clf()
def valueChartList(inputlist,path): seen_values = Counter() for dict in inputlist: seen_values += Counter(dict['location-value-pair'].values()) seen_values = seen_values.most_common()[:25] seen_values_pct = map(itemgetter(1), tupleCounts2Percents(seen_values)) seen_values_pct = ['{:.1%}'.format(item)for item in seen_values_pct] plt.figure() numberchart = plt.bar(range(len(seen_values)), map(itemgetter(1), seen_values), width=0.9,alpha=0.6) plt.xticks(range(len(seen_values)), map(itemgetter(0), seen_values),ha='left') plt.ylabel('Occurrences') plot_margin = 1.15 x0, x1, y0, y1 = plt.axis() plt.axis((x0, x1, y0, y1*plot_margin)) plt.tick_params(axis='both', which='major', labelsize=8) plt.tick_params(axis='both', which='minor', labelsize=8) plt.tight_layout() autolabel(numberchart,seen_values_pct) plt.savefig(path) plt.clf()
def model_fields(model, only=None, exclude=None, field_args=None, converter=None): """ Generate a dictionary of fields for a given Django model. See `model_form` docstring for description of parameters. """ from mongoengine.base import BaseDocument, DocumentMetaclass if not isinstance(model, (BaseDocument, DocumentMetaclass)): raise TypeError('model must be a mongoengine Document schema') converter = converter or ModelConverter() field_args = field_args or {} names = ((k, v.creation_counter) for k, v in model._fields.iteritems()) field_names = map(itemgetter(0), sorted(names, key=itemgetter(1))) if only: field_names = (x for x in field_names if x in only) elif exclude: field_names = (x for x in field_names if x not in exclude) field_dict = {} for name in field_names: model_field = model._fields[name] field = converter.convert(model, model_field, field_args.get(name)) if field is not None: field_dict[name] = field return field_dict
def test_agrupar_datos_de_lista_de_diccionarios(self): rows = [ {'producto': 'Manzanas', 'fecha': '07/01/2012'}, {'producto': 'Manzanas', 'fecha': '07/04/2012'}, {'producto': 'Peras', 'fecha': '07/02/2012'}, {'producto': 'Manzanas', 'fecha': '07/03/2012'}, {'producto': 'Sandias', 'fecha': '07/02/2012'}, {'producto': 'Melones', 'fecha': '07/02/2012'}, {'producto': 'Zanahorias', 'fecha': '07/01/2012'}, {'producto': 'Melones', 'fecha': '07/04/2012'}, ] from operator import itemgetter from itertools import groupby # Ordenamos la lista primero # porque el operador groupby agrupa solo los elemento que se encuentran juntos rows.sort(key=itemgetter('fecha')) for fecha, items in groupby(rows, key=itemgetter('fecha')): print(fecha) for i in items: print(' ', i) # Si no tenemos problemas de memoria es mas rapido de la siguiente manera: from collections import defaultdict rows_by_date = defaultdict(list) for row in rows: rows_by_date[row['fecha']].append(row) # Asi podemos obtener los agrupados por una determinada fecha for r in rows_by_date['07/01/2012']: print(r)
def mapping(probs): """use Huffman coding to map all the characters to a binary string""" tree = probs.items() tree = sorted(tree, key = itemgetter(1)) while len(tree) > 1: node = ([tree[0][0], tree[1][0]], tree[0][1]+tree[1][1]) tree = tree[2:] tree.append(node) tree = sorted(tree, key = itemgetter(1)) #for storing the mapping mapping = {} #for traversing the tree queue = [] #two children nodes of the root left = (tree[0][0][0],'0') right = (tree[0][0][1],'1') queue.append(left) queue.append(right) #traversing the tree, encoding the leaves while len(queue) > 0: node = queue.pop(0) if len(node[0]) > 1: queue.append((node[0][0], node[1]+'0')) queue.append((node[0][1], node[1]+'1')) else: mapping[node[0]] = node[1] return mapping
def emit_conversions_module(f, to_upper, to_lower, to_title): f.write("pub mod conversions {") f.write(""" use core::option::Option; use core::option::Option::{Some, None}; pub fn to_lower(c: char) -> [char; 3] { match bsearch_case_table(c, to_lowercase_table) { None => [c, '\\0', '\\0'], Some(index) => to_lowercase_table[index].1, } } pub fn to_upper(c: char) -> [char; 3] { match bsearch_case_table(c, to_uppercase_table) { None => [c, '\\0', '\\0'], Some(index) => to_uppercase_table[index].1, } } fn bsearch_case_table(c: char, table: &'static [(char, [char; 3])]) -> Option<usize> { table.binary_search_by(|&(key, _)| key.cmp(&c)).ok() } """) t_type = "&'static [(char, [char; 3])]" pfun = lambda x: "(%s,[%s,%s,%s])" % ( escape_char(x[0]), escape_char(x[1][0]), escape_char(x[1][1]), escape_char(x[1][2])) emit_table(f, "to_lowercase_table", sorted(to_lower.items(), key=operator.itemgetter(0)), is_pub=False, t_type = t_type, pfun=pfun) emit_table(f, "to_uppercase_table", sorted(to_upper.items(), key=operator.itemgetter(0)), is_pub=False, t_type = t_type, pfun=pfun) f.write("}\n\n")
def tallyEachJob(placements): items = [item for sublist in placements for item in sublist] items.sort(key=itemgetter(1)) remainWork = [reduce(lambda x,y: (x[0]+y[0],x[1]),group) \ for _,group in groupby(items, key=itemgetter(1))] remainWork.sort(key=itemgetter(0)) return dict([(y,x) for (x,y) in remainWork])
def aggregate_items(username, group_by, get_fields, active_filters, array=False): """Return a list of all books that statisfy the current filters""" cursor, conn = db_sql.connect('books.db') sql = ("SELECT " + group_by + ", " + ", ".join(get_fields) + " FROM " + username) query, paras = query_builder(active_filters) cursor.execute(sql + query, paras) data = [dict(x) for x in cursor.fetchall()] data_temp = [] if array: for row in data: if row[group_by] == None: row[group_by] = [''] temp_field = row[group_by][1:] row[group_by] = row[group_by][0] for value in temp_field: data_temp.append(copy(row)) data_temp[-1][group_by] = value data += data_temp data = sorted(data, key=operator.itemgetter(group_by)) list1 = [] for key, items in itertools.groupby(data, operator.itemgetter(group_by)): list1.append({'_id': key, 'books': list(items)}) conn.close() return list1
def update_events(self, cr, uid, context): if context is None: context = {} calendar_event = self.pool['calendar.event'] user_obj = self.pool['res.users'] att_obj = self.pool['calendar.attendee'] myPartnerID = user_obj.browse(cr,uid,uid,context=context).partner_id.id context_novirtual = context.copy() context_novirtual['virtual_id'] = False context_novirtual['active_test'] = False all_event_from_google = self.get_event_dict(cr,uid,context=context) all_new_event_from_google = all_event_from_google.copy() # Select all events from OpenERP which have been already synchronized in gmail my_att_ids = att_obj.search(cr, uid,[('partner_id', '=', myPartnerID),('google_internal_event_id', '!=', False)], context=context_novirtual) event_to_synchronize = {} for att in att_obj.browse(cr,uid,my_att_ids,context=context): event = att.event_id base_event_id = att.google_internal_event_id.split('_')[0] if base_event_id not in event_to_synchronize: event_to_synchronize[base_event_id] = {} if att.google_internal_event_id not in event_to_synchronize[base_event_id]: event_to_synchronize[base_event_id][att.google_internal_event_id] = self.get_empty_synchro_summarize() event_to_synchronize[base_event_id][att.google_internal_event_id]['OE_attendee_id'] = att.id event_to_synchronize[base_event_id][att.google_internal_event_id]['OE_event'] = event event_to_synchronize[base_event_id][att.google_internal_event_id]['OE_found'] = True event_to_synchronize[base_event_id][att.google_internal_event_id]['OE_event_id'] = event.id event_to_synchronize[base_event_id][att.google_internal_event_id]['OE_isRecurrence'] = event.recurrency event_to_synchronize[base_event_id][att.google_internal_event_id]['OE_isInstance'] = bool(event.recurrent_id and event.recurrent_id > 0) event_to_synchronize[base_event_id][att.google_internal_event_id]['OE_update'] = event.oe_update_date event_to_synchronize[base_event_id][att.google_internal_event_id]['OE_status'] = event.active event_to_synchronize[base_event_id][att.google_internal_event_id]['OE_synchro'] = att.oe_synchro_date for event in all_event_from_google.values(): event_id = event.get('id') base_event_id = event_id.split('_')[0] if base_event_id not in event_to_synchronize: event_to_synchronize[base_event_id] = {} if event_id not in event_to_synchronize[base_event_id]: event_to_synchronize[base_event_id][event_id] = self.get_empty_synchro_summarize() event_to_synchronize[base_event_id][event_id]['GG_event'] = event event_to_synchronize[base_event_id][event_id]['GG_found'] = True event_to_synchronize[base_event_id][event_id]['GG_isRecurrence'] = bool(event.get('recurrence','')) event_to_synchronize[base_event_id][event_id]['GG_isInstance'] = bool(event.get('recurringEventId',0)) event_to_synchronize[base_event_id][event_id]['GG_update'] = event.get('updated',None) # if deleted, no date without browse event if event_to_synchronize[base_event_id][event_id]['GG_update']: event_to_synchronize[base_event_id][event_id]['GG_update'] =event_to_synchronize[base_event_id][event_id]['GG_update'].replace('T',' ').replace('Z','') event_to_synchronize[base_event_id][event_id]['GG_status'] = (event.get('status') != 'cancelled') ###################### # PRE-PROCESSING # ###################### for base_event in event_to_synchronize: for current_event in event_to_synchronize[base_event]: event = event_to_synchronize[base_event][current_event] #If event are already in Gmail and in OpenERP if event['OE_found'] and event['GG_found']: #If the event has been deleted from one side, we delete on other side ! if event['OE_status'] != event['GG_status']: event['td_action'] = "DELETE" event['td_source'] = (event['OE_status'] and "OE") or (event['GG_status'] and "GG") #If event is not deleted ! elif event['OE_status'] and event['GG_status']: if event['OE_update'].split('.')[0] != event['GG_update'].split('.')[0]: if event['OE_update'] < event['GG_update']: event['td_source'] = 'GG' elif event['OE_update'] > event['GG_update']: event['td_source'] = 'OE' if event['td_action'] != "None": if event['%s_isRecurrence' % event['td_source']]: if event['%s_status' % event['td_source']]: event['td_action'] = "UPDATE" event['td_comment'] = 'Only need to update, because i\'m active' else: event['td_action'] = "EXCLUDE" event['td_comment'] = 'Need to Exclude (Me = First event from recurrence) from recurrence' elif event['%s_isInstance' % event['td_source']]: event['td_action'] = "UPDATE" event['td_comment'] = 'Only need to update, because already an exclu' else: event['td_action'] = "UPDATE" event['td_comment'] = 'Simply Update... I\'m a single event' else: if not event['OE_synchro'] or event['OE_synchro'].split('.')[0] < event['OE_update'].split('.')[0]: event['td_source'] = 'OE' event['td_action'] = "UPDATE" event['td_comment'] = 'Event already updated by another user, but not synchro with my google calendar' else: event['td_action'] = "None" event['td_comment'] = 'Not update needed' else: event['td_action'] = "None" event['td_comment'] = "Both are already deleted" # New in openERP... Create on create_events of synchronize function elif event['OE_found'] and not event['GG_found']: #Has been deleted from gmail if event['OE_status']: event['td_source'] = 'OE' event['td_action'] = 'DELETE' event['td_comment'] = 'Removed from GOOGLE ?' else: event['td_action'] = "None" event['td_comment'] = "Already Deleted in gmail and unlinked in OpenERP" elif event['GG_found'] and not event['OE_found']: event['td_source'] = 'GG' if not event['GG_status'] and not event['GG_isInstance']: # don't need to make something... because event has been created and deleted before the synchronization event['td_action'] = 'None' event['td_comment'] = 'Nothing to do... Create and Delete directly' else: if event['GG_isInstance']: if event['%s_status' % event['td_source']]: event['td_action'] = "EXCLUDE" event['td_comment'] = 'Need to create the new exclu' else: event['td_action'] = "EXCLUDE" event['td_comment'] = 'Need to copy and Exclude' else: event['td_action'] = "CREATE" event['td_comment'] = 'New EVENT CREATE from GMAIL' ###################### # DO ACTION # ###################### for base_event in event_to_synchronize: event_to_synchronize[base_event] = sorted(event_to_synchronize[base_event].iteritems(),key=operator.itemgetter(0)) for current_event in event_to_synchronize[base_event]: cr.commit() event = current_event[1] ############# ### DEBUG ### ############# # if event['td_action'] and event['td_action'] != 'None': # print " Real Event %s (%s)" % (current_event[0],event['OE_event_id']) # print " Found OE:%5s vs GG: %5s" % (event['OE_found'],event['GG_found']) # print " Recurrence OE:%5s vs GG: %5s" % (event['OE_isRecurrence'],event['GG_isRecurrence']) # print " Instance OE:%5s vs GG: %5s" % (event['OE_isInstance'],event['GG_isInstance']) # print " Synchro OE: %10s " % (event['OE_synchro']) # print " Update OE: %10s " % (event['OE_update']) # print " Update GG: %10s " % (event['GG_update']) # print " Status OE:%5s vs GG: %5s" % (event['OE_status'],event['GG_status']) # print " Action %s" % (event['td_action']) # print " Source %s" % (event['td_source']) # print " comment %s" % (event['td_comment']) context['curr_attendee'] = event.get('OE_attendee_id',False) actToDo = event['td_action'] actSrc = event['td_source'] if not actToDo: raise ("#!? WHAT I NEED TO DO ????") else: if actToDo == 'None': continue elif actToDo == 'CREATE': context_tmp = context.copy() context_tmp['NewMeeting'] = True if actSrc == 'GG': res = self.update_from_google(cr, uid, False, event['GG_event'], "create", context=context_tmp) event['OE_event_id'] = res meeting = calendar_event.browse(cr,uid,res,context=context) attendee_record_id = att_obj.search(cr, uid, [('partner_id','=', myPartnerID), ('event_id','=',res)], context=context) self.pool.get('calendar.attendee').write(cr,uid,attendee_record_id, {'oe_synchro_date':meeting.oe_update_date,'google_internal_event_id': event['GG_event']['id']},context=context_tmp) elif actSrc == 'OE': raise "Should be never here, creation for OE is done before update !" #TODO Add to batch elif actToDo == 'UPDATE': if actSrc == 'GG': self.update_from_google(cr, uid, event['OE_event'], event['GG_event'], 'write', context) elif actSrc == 'OE': self.update_to_google(cr, uid, event['OE_event'], event['GG_event'], context) elif actToDo == 'EXCLUDE' : if actSrc == 'OE': self.delete_an_event(cr,uid,current_event[0],context=context) elif actSrc == 'GG': new_google_event_id = event['GG_event']['id'].split('_')[1] if 'T' in new_google_event_id: new_google_event_id = new_google_event_id.replace('T','')[:-1] else: new_google_event_id = new_google_event_id + "000000" if event['GG_status']: parent_event = {} parent_event['id'] = "%s-%s" % (event_to_synchronize[base_event][0][1].get('OE_event_id') , new_google_event_id) res = self.update_from_google(cr, uid, parent_event, event['GG_event'], "copy", context) else: if event_to_synchronize[base_event][0][1].get('OE_event_id'): parent_oe_id = event_to_synchronize[base_event][0][1].get('OE_event_id') calendar_event.unlink(cr,uid,"%s-%s" % (parent_oe_id,new_google_event_id),unlink_level=1,context=context) elif actToDo == 'DELETE': if actSrc == 'GG': self.delete_an_event(cr,uid,current_event[0],context=context) elif actSrc == 'OE': calendar_event.unlink(cr,uid,event['OE_event_id'],unlink_level=0,context=context) return True
for k, v in dict_t.items(): if v <= 4 and k != 'APX-HE': for i in range(v): types_final.append('Altro') else: for i in range(v): types_final.append(k) #new dictionary with "altro" added dict_t2 = dict() for t in types_final: dict_t2[t] = dict_t2.get(t, 0) + 1 #sort dictionary for value sorted_d = dict( sorted(dict_t2.items(), key=operator.itemgetter(1), reverse=True)) #print(sorted_d) names = list(sorted_d.keys()) values = list(sorted_d.values()) hunds = [100 for x in values] # Set position of bar on y axis r1 = np.arange(len(hunds)) r1 = [x for x in r1] r2 = [x + (barWidth / len(names)) for x in r1] #plot plt.bar(names, values, width=barWidth, alpha=0.8) plt.xticks(fontsize=textsize)
def load_vina_results(project_file, group, max_load, max_rank, interactions_check): # Load project data with open(project_file) as _project_file: project_data = json.load(_project_file) # Load target target_name = f"{group}.target" if project_data["flexible"]: cmd.load(project_data["rigid_pdbqt"], target_name) else: cmd.load(project_data["target_pdbqt"], target_name) cmd.group(group) cmd.group(group, target_name) # Show box box_name = f"{group}.box" display_box( box_name, ( project_data["center_x"] + project_data["size_x"] / 2, project_data["center_y"] + project_data["size_y"] / 2, project_data["center_z"] + project_data["size_z"] / 2, ), ( project_data["center_x"] - project_data["size_x"] / 2, project_data["center_y"] - project_data["size_y"] / 2, project_data["center_z"] - project_data["size_z"] / 2, ), ) cmd.group(group, box_name) # Parse results results_dir = project_data["results_dir"] results = itertools.chain.from_iterable( map(parse_vina_log, glob(f"{results_dir}/poses/*.pdbqt"))) results = sorted(results, key=itemgetter("affinity")) cache = set() objects = set() count = 0 for pose in results: # Ignore poses which mode is greater than max if pose["mode"] > max_rank: continue # Load molecule into cache cache_name = cmd.get_legal_name(pose["filename"].replace(".", "_")) if cache_name not in cache: cmd.load(pose["filename"], cache_name) cache.add(cache_name) # Compute object names score = int(-10 * pose["affinity"]) state = pose["mode"] base_name = f'{group}.{pose["name"]}_{pose["mode"]}_{score}' obj_name = f"{base_name}.mol" polar_name = f"{base_name}.polar" # Create group cmd.group(base_name) # Create molecule object cmd.create(obj_name, cache_name, state, 1) cmd.group(base_name, obj_name) if interactions_check: cmd.distance(polar_name, target_name, obj_name, 2) cmd.group(base_name, polar_name) objects.add(obj_name) count += 1 if count >= max_load: break cmd.delete("delete " + " ".join(cache))
def main(): CORELEN2 = (2 * 044 * 02000) # Block II CORELEN1 = (2 * 034 * 02000) # Block I global options parser = OptionParser("usage: %prog [options] core1 core2") parser.add_option("-p", "--by-page", action="store_true", dest="bypage", default=False, help="Sort differences by page number.") parser.add_option("-c", "--no-checksums", action="store_false", dest="checksums", default=True, help="Discard differences in checksums.") parser.add_option("-N", "--no-super", action="store_true", dest="noSuper", default=False, help="Discard differences in which one word has 100 in bits 5,6,7 and the other has 011.") parser.add_option("-S", "--only-super", action="store_true", dest="onlySuper", default=False, help="Show only differences involving 100 vs. 011 in bits 5,6,7.") parser.add_option("-Z", "--no-zero", action="store_true", dest="noZero", default=False, help="Discard differences in which the word from the 2nd file is 00000.") parser.add_option("-s", "--stats", action="store_true", dest="stats", default=False, help="Print statistics.") parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="Print extra information.") parser.add_option("-o", "--output", dest="outfilename", metavar="FILE", help="Write output to file.") parser.add_option("-a", "--annotate", action="store_true", dest="annotate", default=False, help="Output a modified listing annotated with core differences.") (options, args) = parser.parse_args() options.analyse = True options.outfile = None if options.outfilename: options.outfile = open(options.outfilename, "w") else: options.outfile = sys.stdout if len(args) < 2: parser.error("Two core files must be supplied!") sys.exit(1) cores = [] for arg in args: cores.append(arg) if not os.path.isfile(arg): parser.error("File \"%s\" does not exist" % arg) sys.exit(1) sizes = [] for core in cores: sizes.append(os.path.getsize(core)) if sizes[0] != sizes[1]: parser.error("Core files are not the same size!") sys.exit(1) if sizes[0] != CORELEN2 and sizes[0] != CORELEN1: parser.error("Core files are incorrect length, must be %d (Block II) or %d (Block I) bytes!" % (CORELEN2, CORELEN1)) sys.exit(1) log("yaAGC Core Rope Differencer") log("") log("Left core file: %s" % cores[0]) log("Right core file: %s" % cores[1]) leftcore = open(cores[0], "rb") rightcore = open(cores[1], "rb") leftdir = os.path.abspath(os.path.dirname(cores[0])) leftlst = os.path.join(leftdir, "*.lst") rightdir = os.path.abspath(os.path.dirname(cores[1])) rightlst = os.path.join(rightdir, "*.lst") lfiles = glob.glob(leftlst) lfiles.extend(glob.glob(rightlst)) # Remove duplicates. ldict = {} for x in lfiles: ldict[x] = x lfiles = ldict.values() if len(lfiles) == 0: print >>sys.stderr, "Warning: no listing file for analysis!" options.analyse = False listfile = None if options.analyse: if len(lfiles) > 1: for l in lfiles: if l.endswith("MAIN.lst"): lfiles.remove(l) if len(lfiles) > 1: print >>sys.stderr, "Warning: multiple listing files, using %s!" % (lfiles[0]) listfile = lfiles[0] if not os.path.isfile(listfile): parser.error("File \"%s\" does not exist" % listfile) sys.exit(1) log("") log("Listing: %s" % listfile) log("Build: %s" % os.path.basename(os.path.dirname(listfile).split('.')[0])) log("Analysing listing file... ", verbose=True) blocks = listing_analyser.analyse(listfile) options.annofile = None if options.annotate: if listfile == None: sys.exit("Annotate option specified, but no input listing file found") afilename = listfile afilename = afilename.replace(".lst", ".anno.txt") options.annofile = open(afilename, 'w') diffcount = {} difftotal = 0 modlist = [] srcfiles = glob.glob(os.path.join(leftdir, "*.agc")) srcfiles.remove(os.path.join(leftdir, "MAIN.agc")) if "Templates.agc" in srcfiles: srcfiles.remove(os.path.join(leftdir, "Template.agc")) for srcfile in srcfiles: modlist.append(os.path.basename(srcfile).split('.')[0]) for module in modlist: diffcount[module] = 0 log("Reading MAIN.agc... ", verbose=True) includelist = [] mainfile = open(os.path.join(leftdir, "MAIN.agc"), "r") mainlines = mainfile.readlines() for line in mainlines: if line.startswith('$'): module = line.split()[0].split('.')[0][1:] includelist.append(module) mainfile.close() diffs = [] lines = [] log("Comparing core image files... ", verbose=True) try: while True: leftdata = leftcore.read(2) rightdata = rightcore.read(2) if not leftdata or not rightdata: break # Read 16-bit word and unpack into 2 byte tuple, native endianness. leftword = struct.unpack("BB", leftdata) rightword = struct.unpack("BB", rightdata) if leftword[0] != rightword[0] or leftword[1] != rightword[1]: # Words differ. Check super bits. leftval = (leftword[0] << 7) | (leftword[1] >> 1) rightval = (rightword[0] << 7) | (rightword[1] >> 1) if options.noZero and rightval == 0: continue if ((leftval ^ rightval) & 0160) == 0160 and ((leftval & 0160) == 0100 or (leftval & 0160) == 0060): if options.noSuper: continue else: if options.onlySuper: continue i = (leftcore.tell() - 2) / 2 offset = 02000 + (i % 02000) bank = i / 02000 if bank < 4: bank ^= 2 line = "%06o (" % i if i < 04000: address = " %04o" % (i + 04000) else: address = "%02o,%04o" % (bank, offset) line += "%s) " % address line += "%05o %05o" % (leftval, rightval) if options.analyse: block = listing_analyser.findBlock(blocks, i) if block: line += " " + block.getInfo() diffcount[block.module] += 1 diffs.append(CoreDiff(i, address, leftval, rightval)) difftotal += 1 lines.append(line) finally: leftcore.close() rightcore.close() log("%d core image differences" % (difftotal), verbose=True) lines = {} buggers = [] module = None pagenum = 0 address = 0 checkdiffs = 0 linenum = 0 log("Building module/page/line list... ", verbose=True) for line in open(listfile, "r"): linenum += 1 elems = line.split() if len(elems) > 0: if not line.startswith(' '): if "# Page " in line and "scans" not in line: pagenum = line.split()[3] if pagenum.isdigit(): pagenum = int(pagenum) if elems[0][0].isdigit(): if len(elems) > 1: if elems[1].startswith('$'): module = elems[1][1:].split('.')[0] else: if len(elems) > 2: if elems[1][0].isdigit() and elems[2][0].isdigit() and len(elems[2]) == 5: address = elems[1] lines[address] = (module, pagenum, linenum, line) if len(elems) > 3: # Handle 2-word quantities, yaYUL outputs listing for the two combined at the address of the first. if elems[3][0].isdigit() and len(elems[3]) == 5: if "," in address: bank = int(address.split(',')[0], 8) offset = int(address.split(',')[1], 8) offset += 1 address = "%02o,%04o" % (bank, offset) else: offset = int(address, 8) offset += 1 address = "%04o" % offset lines[address] = (module, pagenum, linenum, line) if line.startswith("Bugger"): buggers.append(line) log("Setting diff locations... ", verbose=True) for diff in diffs: address = diff.address.strip() if address in lines.keys(): (module, pagenum, linenum, line) = lines[address] diff.setloc(pagenum, module, linenum, line) elif diff.srcline == None: foundBugger = False for bugger in buggers: bval = bugger.split()[2] baddr = bugger.split()[4] if baddr.endswith('.'): baddr = baddr[:-1] if address == baddr: diff.setloc(0, "Checksum", 0, "%s%s%s%s" % (15 * ' ', baddr, 11 * ' ', bval)) checkdiffs += 1 foundBugger = True break if not foundBugger: print >>sys.stderr, "Error: address %s not found in listing file" % (address) log("Error: address %s not found in listing file" % (address)) else: print >>sys.stderr, "Error: address %s not found in listing file" % (address) log("Error: address %s not found in listing file" % (address)) log("") log("%s" % ("Total core differences: %d (checksums=%d)" % (difftotal, checkdiffs))) # Sort by page/line. if options.bypage == True: newdiffs = [] diffIndex = {} diffIndex[0] = [] for diff in diffs: if diff.pagenum is None or diff.pagenum == 0: diffIndex[0].append(diff) continue if diff.pagenum not in diffIndex.keys(): diffIndex[diff.pagenum] = {} diffIndex[diff.pagenum][diff.linenum] = diff pages = diffIndex.keys() pages.sort() for diff in diffIndex[0]: newdiffs.append(diff) for page in pages[1:]: lines = diffIndex[page].keys() lines.sort() for line in lines: newdiffs.append(diffIndex[page][line]) diffs = newdiffs log("%s" % ("Source difference lines: %d" % len(diffs))) log("") if difftotal > 0: log("Core address Left Right Page Module Line Number Address Source") log("---------------- ----- ----- ---- ------------------------------------------------ -------------- ------- ------------------------------------------------") log("") for diff in diffs: if options.checksums == True or (options.checksums == False and diff.module != "Checksum"): log(diff.__str__()) if options.annofile: linenums = [] diffsbyline = {} for diff in diffs: if diff.linenum != None and diff.linenum != 0: linenums.append(diff.linenum) diffsbyline[diff.linenum] = diff linenums.sort() diffindex = 0 linenum = 0 for line in open(listfile, "r"): linenum += 1 if diffindex < len(linenums) and linenum == linenums[diffindex]: diff = diffsbyline[linenum] print >>options.annofile print >>options.annofile, ">>> Core error %d of %d at %s: expected %05o, got %05o" % (diffindex + 1, len(linenums), diff.address, diff.leftval, diff.rightval) diffindex += 1 print >>options.annofile, line, if options.stats: diffblocks = [] index = 0 while index < len(diffs) - 1: cur = index end = index + 1 while diffs[end].coreaddr == diffs[cur].coreaddr + 1: cur += 1 end += 1 length = end - index - 1 if length > 1: diffblocks.append((diffs[index], length)) index = end diffblocks.sort() if len(diffblocks) > 0: log("") log("") log("Difference blocks: (sorted by length, ignoring single isolated differences)") #log("-" * 80) log("") log("Core address Diffs Module ") log("---------------- ----- ---------------------------------------------------- ") for (diff, length) in sorted(diffblocks, key=operator.itemgetter(1), reverse=True): address = diff.address if "," in address: bank = int(address.split(',')[0], 8) offset = int(address.split(',')[1], 8) i = 010000 + bank * 02000 + offset else: i = int(address, 8) line = "%06o (" % i offset = 02000 + (i % 02000) bank = i / 02000 if bank < 4: bank ^= 2 if i < 04000: line += " %04o) " % (i + 04000) else: line += "%02o,%04o) " % (bank, offset) line += "%6d" % length block = listing_analyser.findBlock(blocks, i) if block: line += " " + block.getInfo() log(line) log("-" * 80) counts = [] for module in diffcount: counts.append((module, diffcount[module])) counts.sort() if options.stats: log("") log("Per-module differences: (sorted by errors)") log("-" * 80) for count in sorted(counts, key=operator.itemgetter(1), reverse=True): log("%-48s %6d" % count) log("-" * 80) log("") log("Per-module differences: (sorted by module)") log("-" * 80) for count in counts: log("%-48s %6d" % count) log("-" * 80) log("") log("Per-module differences: (sorted by include order)") log("-" * 80) for module in includelist: log("%-48s %6d" % (module, diffcount[module])) log("-" * 80) log("Done", verbose=True) if options.annofile: options.annofile.close() if options.outfile: options.outfile.close() if difftotal > 0: if options.outfilename: print "Core differences are in", options.outfilename else: print "Core differences found" else: print "No core differences found"
top_words={} InverseDocumentFrequency={} for asin in dictionary_per_product: for word in dictionary_per_product[asin]: if not word in InverseDocumentFrequency: InverseDocumentFrequency[word]=0.0 InverseDocumentFrequency[word]+=1.0 for asin in dictionary_per_product: top_words[asin]=set() for word in dictionary_per_product[asin]: dictionary_per_product[asin][word]=dictionary_per_product[asin][word]*math.log(len(dictionary_per_product)/InverseDocumentFrequency[word]) dpp_sorted=sorted(dictionary_per_product[asin].items(),key=operator.itemgetter(1), reverse=True) for word in dpp_sorted: top_words[asin].add(word) if len(top_words[asin])>30: break print asin, top_words[asin]
def getOpFromLocation(self, i, j): ''' Insert, Delete, Substitution, No Change = range(4) return the direction that traceback moves 0: vertical movement, insertion 1: horizontal movement, deletion 2: diagonal movement, substitution 3: diagonal movement, no change raises a ValueError if i == 0 and j == 0. >>> target = stream.Stream() >>> source = stream.Stream() >>> note1 = note.Note("C4") >>> note2 = note.Note("D4") >>> note3 = note.Note("C4") >>> note4 = note.Note("E4") >>> target.append([note1, note2, note3, note4]) >>> source.append([note1, note2, note3]) >>> sa = alpha.analysis.aligner.StreamAligner(target, source) >>> sa.makeHashedStreams() >>> sa.setupDistanceMatrix() >>> sa.populateDistanceMatrix() >>> sa.distanceMatrix array([[0, 2, 4, 6], [2, 0, 2, 4], [4, 2, 0, 2], [6, 4, 2, 0], [8, 6, 4, 2]]) >>> sa.getOpFromLocation(4, 3) <ChangeOps.Insertion: 0> >>> sa.getOpFromLocation(2, 2) <ChangeOps.NoChange: 3> >>> sa.getOpFromLocation(0, 2) <ChangeOps.Deletion: 1> >>> sa.distanceMatrix[0][0] = 1 >>> sa.distanceMatrix array([[1, 2, 4, 6], [2, 0, 2, 4], [4, 2, 0, 2], [6, 4, 2, 0], [8, 6, 4, 2]]) >>> sa.getOpFromLocation(1, 1) <ChangeOps.Substitution: 2> >>> sa.getOpFromLocation(0, 0) Traceback (most recent call last): ValueError: No movement possible from the origin ''' possibleMoves = self.getPossibleMovesFromLocation(i, j) if possibleMoves[0] is None: if possibleMoves[1] is None: raise ValueError('No movement possible from the origin') else: return ChangeOps.Deletion elif possibleMoves[1] is None: return ChangeOps.Insertion currentCost = self.distanceMatrix[i][j] minIndex, minNewCost = min(enumerate(possibleMoves), key=operator.itemgetter(1)) if currentCost == minNewCost: return ChangeOps.NoChange else: return ChangeOps(minIndex)
def root_index_lookup(tbl_root, i, j): return tbl_root[i][j].value if __name__ == '__main__': start = time.time() # w = ["A", "B", "C", "D", "E", "F", "G"] # f = [3, 5, 1, 2, 10, 6, 5] items = [] with open("frequencies2.txt") as fp: for line in fp: w, f = line.rstrip().split() f = int(f) items.append((w, f)) print("uploaded the file") items.sort(key=operator.itemgetter(0)) w = [w for w, _ in items] f = [f for _, f in items] l = len(w) MAX_COST = 2 ** 32 tbl_cost = [[0] * l for _ in range(l)] tbl_sum = [[0] * l for _ in range(l)] tbl_root = [[None] * l for _ in range(l)] print("finsh creating three tables") for i in range(l): for j in range(i, l): tbl_sum[i][j] += sum_lookup(tbl_sum, i, j - 1) + f[j] # printTable(tbl_sum)
def get(self, id): id = int(id) if id == 0: id = MAXINT current_user_id = self.current_user_id result, last_id = render_feed_by_zsite_id(current_user_id, PAGE_LIMIT, id) result = tuple( (i, tuple(g)) for i, g in groupby(result, itemgetter(0)) ) zsite_id_set = set( i[0] for i in result ) c_dict = career_dict(zsite_id_set) r = [] if result: site_id_set = set() for zsite_id, item_list in result: zsite = Zsite.mc_get(zsite_id) t = [] for i in item_list: id = i[1] cid = i[4] rid = i[5] site_id = i[6] if site_id: site_id_set.add(site_id) if len(i) >= FEED_TUPLE_DEFAULT_LEN: after = i[FEED_TUPLE_DEFAULT_LEN:] i = i[:FEED_TUPLE_DEFAULT_LEN] else: after = None if cid not in (CID_WORD, CID_EVENT): i.extend(zsite_tag_id_tag_name_by_po_id(zsite_id, id)) if after: i.extend(after) tag_list = tag_name_id_list_by_po_id(id) i.append(tag_list) t.append(i[1:]) unit, title = c_dict[zsite_id] if zsite: r.append(( zsite.cid, zsite.name, zsite.link, unit, title, pic_url_with_default(zsite_id, '219'), t )) else: print 'feed_rm %s zsite_id %s'%(id, zsite_id) feed_rm(id) r.append(zsite_name_id_dict(site_id_set)) r.append(last_id) result = dumps(r) self.finish(result)
def split_textline(table, textline, direction, flag_size=False, strip_text=''): """Splits PDFMiner LTTextLine into substrings if it spans across multiple rows/columns. Parameters ---------- table : camelot.core.Table textline : object PDFMiner LTTextLine object. direction : string Direction of the PDFMiner LTTextLine object. flag_size : bool, optional (default: False) Whether or not to highlight a substring using <s></s> if its size is different from rest of the string. (Useful for super and subscripts.) strip_text : str, optional (default: '') Characters that should be stripped from a string before assigning it to a cell. Returns ------- grouped_chars : list List of tuples of the form (idx, text) where idx is the index of row/column and text is the an lttextline substring. """ idx = 0 cut_text = [] bbox = textline.bbox try: if direction == 'horizontal' and not textline.is_empty(): x_overlap = [i for i, x in enumerate(table.cols) if x[0] <= bbox[2] and bbox[0] <= x[1]] r_idx = [j for j, r in enumerate(table.rows) if r[1] <= (bbox[1] + bbox[3]) / 2 <= r[0]] r = r_idx[0] x_cuts = [(c, table.cells[r][c].x2) for c in x_overlap if table.cells[r][c].right] if not x_cuts: x_cuts = [(x_overlap[0], table.cells[r][-1].x2)] for obj in textline._objs: row = table.rows[r] for cut in x_cuts: if isinstance(obj, LTChar): if (row[1] <= (obj.y0 + obj.y1) / 2 <= row[0] and (obj.x0 + obj.x1) / 2 <= cut[1]): cut_text.append((r, cut[0], obj)) break elif isinstance(obj, LTAnno): cut_text.append((r, cut[0], obj)) elif direction == 'vertical' and not textline.is_empty(): y_overlap = [j for j, y in enumerate(table.rows) if y[1] <= bbox[3] and bbox[1] <= y[0]] c_idx = [i for i, c in enumerate(table.cols) if c[0] <= (bbox[0] + bbox[2]) / 2 <= c[1]] c = c_idx[0] y_cuts = [(r, table.cells[r][c].y1) for r in y_overlap if table.cells[r][c].bottom] if not y_cuts: y_cuts = [(y_overlap[0], table.cells[-1][c].y1)] for obj in textline._objs: col = table.cols[c] for cut in y_cuts: if isinstance(obj, LTChar): if (col[0] <= (obj.x0 + obj.x1) / 2 <= col[1] and (obj.y0 + obj.y1) / 2 >= cut[1]): cut_text.append((cut[0], c, obj)) break elif isinstance(obj, LTAnno): cut_text.append((cut[0], c, obj)) except IndexError: return [(-1, -1, textline.get_text())] grouped_chars = [] for key, chars in groupby(cut_text, itemgetter(0, 1)): if flag_size: grouped_chars.append((key[0], key[1], flag_font_size([t[2] for t in chars], direction, strip_text=strip_text))) else: gchars = [t[2].get_text() for t in chars] grouped_chars.append((key[0], key[1], ''.join(gchars).strip(strip_text))) return grouped_chars
# print(trace) ### profile trace ### (_, stack_distances, line_accesses) = trace_profile( trace, args.trace_enable_padding ) stack_distances.reverse() line_accesses.reverse() # print(line_accesses) # print(stack_distances) ### compute probability distribution ### # count items l = len(stack_distances) dc = sorted( collections.Counter(stack_distances).items(), key=operator.itemgetter(0) ) # create a distribution list_sd = list(map(lambda tuple_x_k: tuple_x_k[0], dc)) # x = tuple_x_k[0] dist_sd = list( map(lambda tuple_x_k: tuple_x_k[1] / float(l), dc) ) # k = tuple_x_k[1] cumm_sd = [] # np.cumsum(dc).tolist() #prefixsum for i, (_, k) in enumerate(dc): if i == 0: cumm_sd.append(k / float(l)) else: # add the 2nd element of the i-th tuple in the dist_sd list cumm_sd.append(cumm_sd[i - 1] + (k / float(l)))
def sharpen_region(self, region): data = self.dataset root = self._root nbins = self.n_bins xvar = yvar = zvar = None if 0 <= self.x_var_index < len(self.x_var_model): xvar = self.x_var_model[self.x_var_index] if 0 <= self.y_var_index < len(self.y_var_model): yvar = self.y_var_model[self.y_var_index] if 0 <= self.z_var_index < len(self.z_var_model): zvar = self.z_var_model[self.z_var_index] if data is None or xvar is None or yvar is None or root is None: return if not QRectF(*root.brect).intersects(region): return def bin_func(xbins, ybins): return grid_bin(data, xvar, yvar, xbins, ybins, zvar) def min_depth(node, region): if not region.intersects(QRectF(*node.brect)): return np.inf elif node.is_leaf: return 1 elif node.is_empty: return 1 else: xs, xe, ys, ye = bindices(node, region) children = node.children[xs:xe, ys:ye].ravel() contingency = node.contingencies[xs:xe, ys:ye] if contingency.ndim == 3: contingency = contingency.reshape(-1, contingency.shape[2]) if any(ch is None and np.any(val) for ch, val in zip(children, contingency)): return 1 else: ch_depth = [ min_depth(ch, region) + 1 for ch in filter(is_not_none, children.flat) ] return min(ch_depth if ch_depth else [1]) depth = min_depth(self._root, region) bw = self._sampling_width() nodes = self.select_nodes_to_sharpen(self._root, region, bw, depth + 1) def update_rects(node): scored = score_candidate_rects(node, region) ind1 = set(zip(*Node_nonzero(node))) ind2 = set(zip(*node.children.nonzero())) \ if not node.is_leaf else set() ind = ind1 - ind2 return [(score, r) for score, i, j, r in scored if (i, j) in ind] scored_rects = reduce(operator.iadd, map(update_rects, nodes), []) scored_rects = sorted(scored_rects, reverse=True, key=operator.itemgetter(0)) root = self._root update_time = time.time() with self.progressBar(len(scored_rects)) as progress_bar: for i, (_, rect) in enumerate(scored_rects): root = sharpen_region_recur(root, rect.intersected(region), nbins, depth + 1, bin_func) tick = time.time() - update_time if tick > 2.0: self.update_map(root) update_time = time.time() progress_bar.advance() self._root = root self._cache[xvar, yvar, zvar] = self._root self.update_map(self._root)
def vote(self, candidates, subject, ballot): """ Votes for the isolate(s) with the minimal compatibility distance :param candidates: Isolates to vote for :param subject: The component to place :param ballot: The vote ballot """ # Subject component name component_name = subject.name # Preference for candidate: (number of components, candidate) preference = [] # Neutral isolate (last resort) neutral_candidate = None # Prepare a dictionary: candidate -> components all_components = {} for candidate in candidates: components = sorted(component.name for component in candidate.components) if not components and not candidate.name: # Found the neutral isolate (do not add it to 'all_components') neutral_candidate = candidate else: if component_name in components: # Found the isolate where the isolate already is components.remove(component_name) # Store information all_components[candidate] = components # Sort candidates by number of components already there sorted_candidates = [(len(content), candidate) for candidate, content in all_components.items()] sorted_candidates.sort(key=lambda x: (-x[0], x[1].name)) # Compute candidate preference (empty or OK) for _, candidate in sorted_candidates: # Analyze each candidate components = all_components[candidate] if not components: # No components, we're OK with it preference.append((0, candidate)) else: # Ensure that the content of this isolate won't be a known # crashing solution future_content = set(components) future_content.add(component_name) for crash in self._crashes: if future_content.issuperset(crash): # Solution is (a superset of) a crashing solution _logger.info( "Known bad solution for %s:\n%s\ndue to:\n%s", component_name, ', '.join(name for name in sorted(future_content)), ', '.join(name for name in sorted(crash))) ballot.append_against(candidate) break else: # Not a crashing solution preference.append((len(components), candidate)) # TODO: tweak vote preferences to reduce the number of moves if preference: # Sort results (greater is better: it gathers components) preference.sort(key=operator.itemgetter(0), reverse=True) _logger.info( "Vote preference for %s: %s", component_name, ', '.join(item[1].name or "Neutral" for item in preference)) # Vote for _, candidate in preference: ballot.append_for(candidate) elif neutral_candidate is not None: # We voted for no one: vote for neutral _logger.info("Using neutral candidate for %s", component_name) ballot.append_for(neutral_candidate) # Lock our vote ballot.lock()
def find_golf(): global bgr, depth, mask_gripper position = Point() position.x = 100 position.z = 100 position.y = 100 # while not rospy.is_shutdown(): if bgr is None: return position gray = cv.cvtColor(bgr, cv.COLOR_BGR2GRAY) # r,c = gray.shape # mask_gripper = cv.resize(mask_gripper, (r,c)) bg = cv.medianBlur(gray, 61) fg = cv.medianBlur(gray, 5) sub_sign = np.int16(fg) - np.int16(bg) sub_pos = np.clip(sub_sign.copy(), 0, sub_sign.copy().max()) sub_neg = np.clip(sub_sign.copy(), sub_sign.copy().min(), 0) sub_pos = normalize(sub_pos) sub_neg = normalize(sub_neg) # cv.imshow('sub_pos',sub_pos) # cv.imshow('sub_neg',sub_neg) _, obj = cv.threshold(sub_pos, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU) print("obj", obj.shape) print("mask", mask_gripper.shape) obj = cv.bitwise_and(obj, obj, mask=mask_gripper) _, contours, _ = cv.findContours(obj.copy(), cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE) display = cv.cvtColor(sub_neg.copy(), cv.COLOR_GRAY2BGR) r = 0 circle = [] for cnt in contours: area_cnt = cv.contourArea(cnt) (x, y), radius = cv.minEnclosingCircle(cnt) center = (int(x), int(y)) radius = radius area_cir = math.pi * (radius**2) if area_cir <= 0 or area_cnt / area_cir < 0.8: continue cv.circle(display, center, int(radius), (255, 0, 0), -1) circle.append([x, y, radius]) row, col = gray.shape if len(circle) > 0: circle = sorted(circle, key=itemgetter(2), reverse=True) circle = circle[0] x, y, radius = circle cv.circle(display, (int(x), int(y)), int(radius), (0, 0, 255), 2) diameter = 2. * radius pixel_per_cm = diameter / 4.3 print("radius", radius) print("pixel_per_cm", pixel_per_cm) print("depth", depth) if True: x_distance_pixel = row / 2. - y y_distance_pixel = col / 2. - x print("row col", row, col) print("x y", x, y) print("x_distance_pixel y_distance_pixel:", x_distance_pixel, y_distance_pixel) x_distance_cm = float(x_distance_pixel) / pixel_per_cm y_distance_cm = float(y_distance_pixel) / pixel_per_cm print("x_distance_cm:", x_distance_cm) print("y_distance_cm:", y_distance_cm) x_distance_meter = x_distance_cm / 100. y_distance_meter = y_distance_cm / 100. print("x_meter:", x_distance_meter) print("y_meter:", y_distance_meter) cv.circle(display, (int(x), int(y)), int(radius), (0, 255, 255), -1) # cv.imshow('obj',obj) bg = np.uint8(bg) fg = np.uint8(fg) # cv.imshow('original_bgr', bgr) # cv.imshow('bg', bg) # cv.imshow('fg', fg) # cv.imshow('display', display) position.x = x_distance_meter position.y = y_distance_meter return position # k = cv.waitKey(1) & 0xff return position
__author__ = 'Hernan Y.Ke' from operator import itemgetter items = [{ 'first': 'Hernan', 'last': 'k' }, { 'first': 'Hernan', 'last': 'w' }, { 'first': 'C', 'last': 'l' }, { 'first': 'Z', 'last': 'k' }] print(sorted(items, key=itemgetter('last'))) print(sorted(items, key=lambda x: x['last'])) # alternative print(sorted(items, key=itemgetter('last', 'first'))) print(sorted(items, key=lambda x: (x['last'], x['first']))) #alternative using array syntax #other funcs like min, max