def __init__( self, name, schema, columns, inputs=None, definition=None, dependent_on=None, dependents=None, comment=None, relationtype="unknown", parent_table=None, partition_def=None, rowsecurity=False, forcerowsecurity=False, ): self.name = name self.schema = schema self.inputs = inputs or [] self.columns = columns self.definition = definition self.relationtype = relationtype self.dependent_on = dependent_on or [] self.dependents = dependents or [] self.dependent_on_all = [] self.dependents_all = [] self.constraints = od() self.indexes = od() self.comment = comment self.parent_table = parent_table self.partition_def = partition_def self.rowsecurity = rowsecurity self.forcerowsecurity = forcerowsecurity
def combine_basic_match_topic_sensitive_page_rank(basic_best_docs, ts_page_ranks, output, i): print >> output, "///////////////////////// Combining Basic Best Match and Topic Sensitive Page Rank /////////////////////////" print >> output, "///////////////////////// Iterazione: ",i," /////////////////////////" start_time = timeit.default_timer() temp1 = sorted(ts_page_ranks, key=lambda x: ts_page_ranks[x], reverse=True) ts_pr = od((x, ts_page_ranks[x]) for x in temp1) tmp = dict() for doc in basic_best_docs: for topic in ts_pr.keys(): for doc2 in ts_pr[topic]: if doc==doc2: if doc not in tmp: tmp[doc] = 0 tmp[doc] = ts_pr[topic][doc] temp = sorted(tmp, key=lambda x: tmp[x], reverse=True) sorted_docs = od((x, tmp[x]) for x in temp) combine_basic_match_topic_sensitive_page_rank = timeit.default_timer() - start_time print >> output, "Tempo impiegato: ", str(combine_basic_match_topic_sensitive_page_rank) print >> output, "Migliori 20 documenti con relativo topic sensitive page rank: " print >> output, sorted_docs
def copy_key(redis_client, ledis_client, key, convert=False): global entries k_type = redis_client.type(key) if k_type == "string": value = redis_client.get(key) ledis_client.set(key, value) set_ttl(redis_client, ledis_client, key, k_type) entries += 1 elif k_type == "list": _list = redis_client.lrange(key, 0, -1) for value in _list: ledis_client.rpush(key, value) set_ttl(redis_client, ledis_client, key, k_type) entries += 1 elif k_type == "hash": mapping = od(redis_client.hgetall(key)) ledis_client.hmset(key, mapping) set_ttl(redis_client, ledis_client, key, k_type) entries += 1 elif k_type == "zset": out = redis_client.zrange(key, 0, -1, withscores=True) pieces = od() for i in od(out).iteritems(): pieces[i[0]] = int(i[1]) ledis_client.zadd(key, **pieces) set_ttl(redis_client, ledis_client, key, k_type) entries += 1 else: print "KEY %s of TYPE %s is not supported by LedisDB." % (key, k_type)
def test_etree(self): 'Parker conversion from data to etree' eq = self.check_etree(xmljson.parker) # From https://developer.mozilla.org/en-US/docs/JXON#In_summary eq({'animal': {}}, '<animal/>') eq({'animal': 'Deka'}, '<animal>Deka</animal>') eq({'animal': 1}, '<animal>1</animal>') eq({'animal': od([('dog', 'Charlie'), ('cat', 'Deka')])}, '<animal><dog>Charlie</dog><cat>Deka</cat></animal>') eq({'animal': {'dog': ['Charlie', 'Mad Max']}}, '<animal><dog>Charlie</dog><dog>Mad Max</dog></animal>') # Test edge cases eq('x', '<x/>') # Strings become elements eq({}) # Empty objects become empty nodes eq(od([ # Multiple keys become multiple nodes ('x', 'a'), ('y', 'b') ]), '<x>a</x>', '<y>b</y>') with self.assertRaises(Exception): eq({'x': {'@x': 1}}, '<x x="1"/>') # Nested elements eq({'alice': od([ ('bob', {'charlie': {}}), ('david', {'edgar': {}})])}, '<alice><bob><charlie/></bob><david><edgar/></david></alice>') # Multiple elements at the same level become array elements. eq({'alice': {'bob': [{'charlie': {}}, {'david': {}}]}}, '<alice><bob><charlie/></bob><bob><david/></bob></alice>')
def create_epdict(ep_dict, url): try: scraper = cfscrape.create_scraper() content = scraper.get(url).content soup = bs(content) epdict = {} eplist = [] titlelist = [] if "Drama" in url: for link in soup.find_all('a'): if 'href' in str(link): try: if "Episode-" in link['href']: possible = link['href'].split("Episode-")[1] possible = "/Episode-" + possible fullurl = url + possible fulltitle = link['title'].split("Episode ")[1][:9] integers = [str(i) for i in range(0,10)] if possible: episode = possible[:7] if fulltitle[0] in integers and fulltitle[1] in integers and fulltitle[2] in integers: title = fulltitle[:3] elif fulltitle[0] in integers and fulltitle[1] in integers: title = fulltitle[:2] else: title = fulltitle[0] eplist.append(fullurl.encode('ascii')) titlelist.append(title.encode('ascii')) except TypeError: pass epdict = od((zip(titlelist[::-1][0:], eplist[::-1][0:]))) return epdict else: for link in soup.find_all('a'): try: if "Episode-" in link['href']: possible = link['href'].split("Episode-")[1] possible = "/Episode-" + possible fullurl = url + possible fulltitle = link['title'].split("Episode ")[1][:9] integers = [str(i) for i in range(0,10)] if fulltitle[4] in integers and fulltitle[5] in integers and fulltitle[6] in integers: title = fulltitle[:7] elif fulltitle[3:6] == " - " and fulltitle[6::9] in integers and fulltitle[7::9] in integers and fulltitle[8::9] in integers: title = fulltitle[:9] elif 'v' in fulltitle[:4].lower(): title = fulltitle[0:5] else: title = fulltitle[:3] eplist.append(fullurl.encode('ascii')) titlelist.append(title.encode('ascii')) except TypeError: pass epdict = od((zip(sorted(titlelist),sorted(eplist)))) return epdict except: pass
def make_ped_file(snp_list, annot_file, fam_file, conf_file, out_pre): """ The PED file is a white-space (space or tab) delimited file. the first six columns are mandatory: Family ID Individual ID Paternal ID Maternal ID Sex (1=male; 2=female; other=unknown) Phenotype Genotypes (column 7 onwards) should also be white-space delimited; they can be any character (e.g. 1,2,3,4 or A,C,G,T or anything else) except 0 which is, by default, the missing genotype character. All markers should be biallelic. """ good_snps = set([x.rstrip() for x in snp_list]) annots = od() annot_reader = csv.reader(annot_file, delimiter=",", quotechar='"') for row in annot_reader: if row[0].startswith('AX-') and row[0] in good_snps: # Probe Set ID: [Chromosome,Physical Position,Allele A,Allele B] kept_annots = [row[x] for x in [3, 4, 9, 10]] kept_annots[0] = kept_annots[0].split('.')[1] annots[row[0]] = kept_annots fam_dict = od() for l in fam_file: tmp = l.rstrip().split() fam_dict[tmp[1]] = tmp kept_samples = [] sample_names = [] gts = {} gt_reader = csv.reader(conf_file, delimiter="\t") for row in gt_reader: if not row[0].startswith('#'): if row[0] == "Probe Set ID" or row[0] == 'probeset_id': for i, ind_tmp in enumerate(row[1:]): ind = re.sub('(\.AxiomGT1\.chp Call Codes)|(\.CEL)', '', ind_tmp) if ind in fam_dict: kept_samples.append(i+1) sample_names.append(ind) else: if row[0] in annots: gts[row[0]] = [row[x] for x in kept_samples] with open('{}.conf'.format(out_pre), 'w') as ped_file: for snp_id in annots: # transpose genotype matrix for ind, gt in zip(sample_names, gts[snp_id]): fam_dict[ind].append(gt) for ind in fam_dict: ped_file.write('\t'.join(fam_dict[ind]) + '\n')
def differences(a, b, add_dependencies_for_modifications=True): a_keys = set(a.keys()) b_keys = set(b.keys()) keys_added = set(b_keys) - set(a_keys) keys_removed = set(a_keys) - set(b_keys) keys_common = set(a_keys) & set(b_keys) added = od((k, b[k]) for k in sorted(keys_added)) removed = od((k, a[k]) for k in sorted(keys_removed)) modified = od((k, b[k]) for k in sorted(keys_common) if a[k] != b[k]) unmodified = od((k, b[k]) for k in sorted(keys_common) if a[k] == b[k]) return added, removed, modified, unmodified
def test_etree(self, converter=None): 'BadgerFish conversion from data to etree' eq = self.check_etree(converter or xmljson.badgerfish) # From https://developer.mozilla.org/en-US/docs/JXON#In_summary eq({'animal': {}}, '<animal/>') eq({'animal': 'Deka'}, '<animal>Deka</animal>') eq({'animal': 1}, '<animal>1</animal>') eq({'animal': {'@name': 1}}, '<animal name="1"/>') eq({'animal': {'@name': 'Deka', '$': 'is my cat'}}, '<animal name="Deka">is my cat</animal>') eq({'animal': od([('dog', 'Charlie'), ('cat', 'Deka')])}, '<animal><dog>Charlie</dog><cat>Deka</cat></animal>') eq({'animal': {'dog': ['Charlie', 'Mad Max']}}, '<animal><dog>Charlie</dog><dog>Mad Max</dog></animal>') eq({'animal': {'$': ' in my house ', 'dog': 'Charlie'}}, '<animal> in my house <dog>Charlie</dog></animal>') # TODO: handling split text # eq({'animal': {'$': ' in my house', 'dog': 'Charlie'}}, # '<animal> in my <dog>Charlie</dog> house</animal>') # Test edge cases eq('x', '<x/>') # Strings become elements eq({}) # Empty objects become empty nodes eq(od([ # Multiple keys become multiple nodes ('x', {'@x': 1}), ('y', 'z') ]), '<x x="1"/>', '<y>z</y>') # Attributes eq({'p': {'@id': 1, '$': 'text'}}, '<p id="1">text</p>') eq({'div': {'@id': 2, '$': 'parent-text', 'p': {'$': 'text'}}}, '<div id="2">parent-text<p>text</p></div>') # From http://www.sklar.com/badgerfish/ # Text content of elements goes in the $ property of an object. eq({'alice': {'$': 'bob'}}, '<alice>bob</alice>') # Nested elements become nested properties eq({'alice': od([ ('bob', {'$': 'charlie'}), ('david', {'$': 'edgar'})])}, '<alice><bob>charlie</bob><david>edgar</david></alice>') # Multiple elements at the same level become array elements. eq({'alice': {'bob': [{'$': 'charlie'}]}}, '<alice><bob>charlie</bob></alice>') eq({'alice': {'bob': [{'$': 'charlie'}, {'$': 'david'}]}}, '<alice><bob>charlie</bob><bob>david</bob></alice>') # Attributes go in properties whose names begin with @. eq({'alice': {'$': 'bob', '@charlie': 'david'}}, '<alice charlie="david">bob</alice>')
def test_etree(self): 'GData conversion from etree to data' eq = self.check_etree(xmljson.gdata) # From https://developer.mozilla.org/en-US/docs/JXON#In_summary eq({'animal': {}}, '<animal/>') eq({'animal': 'Deka'}, '<animal>Deka</animal>') eq({'animal': 1}, '<animal>1</animal>') eq({'animal': {'name': 1}}, '<animal name="1"/>') eq({'animal': {'$t': 'is my cat'}}, '<animal>is my cat</animal>') eq({'animal': od([('dog', {'$t': 'Charlie'}), ('cat', {'$t': 'Deka'})])}, '<animal><dog>Charlie</dog><cat>Deka</cat></animal>') eq({'animal': od([('dog', 'Charlie'), ('cat', 'Deka')])}, '<animal dog="Charlie" cat="Deka"/>') eq({'animal': {'dog': ['Charlie', 'Mad Max']}}, '<animal><dog>Charlie</dog><dog>Mad Max</dog></animal>') eq({'animal': {'$t': ' in my house ', 'dog': {'$t': 'Charlie'}}}, '<animal> in my house <dog>Charlie</dog></animal>') eq({'animal': {'$t': ' in my house ', 'dog': 'Charlie'}}, '<animal dog="Charlie"> in my house </animal>') # Test edge cases eq('x', '<x/>') # Strings become elements eq({}) # Empty objects become empty nodes eq(od([ # Multiple keys become multiple nodes ('x', {}), ('y', 'z') ]), '<x/>', '<y>z</y>') # Attributes eq({'p': {'$t': 'text'}}, '<p>text</p>') eq({'div': {'$t': 'parent-text', 'p': {'$t': 'text'}}}, '<div>parent-text<p>text</p></div>') # Text content of elements goes in the $ property of an object. eq({'alice': {'$t': 'bob'}}, '<alice>bob</alice>') # Nested elements become nested properties eq({'alice': od([ ('bob', {'$t': 'charlie'}), ('david', {'$t': 'edgar'})])}, '<alice><bob>charlie</bob><david>edgar</david></alice>') # Multiple elements at the same level become array elements. eq({'alice': {'bob': [{'$t': 'charlie'}]}}, '<alice><bob>charlie</bob></alice>') eq({'alice': {'bob': [{'$t': 'charlie'}, {'$t': 'david'}]}}, '<alice><bob>charlie</bob><bob>david</bob></alice>') # Attributes go in properties whose names begin with @. eq({'alice': {'$t': 'bob'}}, '<alice>bob</alice>')
def planes_of_expression(self): if self._eplane is None: c1=Counter([planes_of_expression[c][0] for c in self.full_name if onlyltrs(c)]) c2=Counter([planes_of_expression[c][1] for c in self.full_name if onlyltrs(c)]) self._eplane3=od([("physical",0),("emotional",0),("mental",0),("intuitive",0)]) self._eplane4=od([("creative",0),("vacillating",0),("grounded",0)]) for c in filter(onlyltrs,self.full_name): k,k2=planes_of_expression[c] self._eplane3[k]=sum_digits(self.l2nmap[c]+self._eplane3[k]) self._eplane4[k2]=sum_digits(self.l2nmap[c]+self._eplane4[k2]) self._eplane,self._eplane2=c1.most_common()[0][0],c2.most_common()[0][0] return self._eplane,self._eplane2,self._eplane3,self._eplane4
def plot_tags(tags_dic, save_fname): """ Create and save plots for 'Graphs' option. These plot files shall be grabbed and included into UI. Args: | *tags_dic* (dict) -- dictionary of POS-tag occurrences | *save_fname* (str) -- currently processed file name without extension Returns: *odd* (OrderedDict) -- frequency sorted POS-tags """ matplotlib.rc('font', **{'size': 13}) # create POS-tags distribution plot odd = od(sorted([(k, v) for k, v in tags_dic.items()], key=lambda x: x[1])) bars = plt.barh(range(len(odd)), odd.values(), align='center') plt.title('Part-of-speech tags statistics') plt.yticks(range(len(odd)), odd.keys()) plt.xlabel('Occurrence') plt.ylabel('POS-tags') plt.grid(True) plt.margins(y=0) random.shuffle(COLLECTION) for i in range(len(tags_dic)): bars[i].set_color(COLLECTION[i]) plt.savefig(os.path.join('_graphs', save_fname + '.png')) # create functional / non-fuctional words pie chart plt.clf() matplotlib.rc('font', **{'size': 16}) functional = ('DT', 'PDT', 'PRP', 'PRP$', 'IN', 'CC', 'UH', 'RP', 'WRB', 'WP$', 'WDT', 'WP', 'EX', 'MD', 'TO') content = ('JJ', 'JJR', 'JJS', 'NN', 'NNS', 'NNP', 'NNPS', 'RB', 'RBR', 'RBS', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ') fwords = sum([tags_dic[k] for k in tags_dic if k in functional]) cwords = sum([tags_dic[k] for k in tags_dic if k in content]) try: fratio = round(fwords / (fwords + cwords) * 100, 1) cratio = round(cwords / (fwords + cwords) * 100, 1) except ZeroDivisionError: fratio = 0.0 cratio = 0.0 labels = ['functional', 'content'] sizes = [fratio, cratio] pie_colors = ['salmon', 'royalblue'] plt.pie(sizes, labels=labels, colors=pie_colors, autopct='%1.1f%%', shadow=True, startangle=90) # Set aspect ratio to be equal so that pie is drawn as a circle. plt.axis('equal') # increasing fonts in a pie chart plt.savefig(os.path.join('_graphs', save_fname + '_pie.png')) plt.clf() return od(reversed(list(odd.items())))
def main(args): paralogs = ["AB", "A", "B", "C", "D", "N"] start = 120535758 stop = 120598753 args = parse_args(args) wl = parse_whitelist(args.whitelist) outf = open(args.tsv, "w") samples = sorted(os.listdir(args.folder)) sample_paths = [os.path.join(args.folder, x) for x in samples] names = [os.path.basename(x) for x in samples] final = od() for sample, n in izip(sample_paths, names): combined = od() vcfs = [os.path.join(sample, x) for x in ["AB.vcf", "A.vcf", "B.vcf", "C.vcf", "D.vcf", "N.vcf"]] for v, p in izip(vcfs, paralogs): combined[p] = make_R(v, wl[p], start, stop) final[n] = combined for sample, paralog_dict in final.iteritems(): for paralog, csv in paralog_dict.iteritems(): outf.write("\t".join(map(str, [sample + "_" + paralog] + csv))) outf.write("\n") outf.close() call = ["Rscript", "/cluster/home/ifiddes/code/n2nl_barplots.R", args.tsv, args.graph + "_full.png"] p1 = subprocess.Popen(call) outf = open(args.summary, "w") rearranged=od() for sample, paralog_dict in final.iteritems(): for paralog, csv in paralog_dict.iteritems(): if paralog not in rearranged: rearranged[paralog] = list() rearranged[paralog].append(csv) for paralog, csvs in rearranged.iteritems(): outf.write("\t".join(map(str, ["median", paralog] + median(csvs)))); outf.write("\n") outf.write("\t".join(map(str, ["mean", paralog] + mean(csvs)))); outf.write("\n") outf.write("\t".join(map(str, ["var", paralog] + var(csvs)))); outf.write("\n") outf.close() call = ["Rscript", "/cluster/home/ifiddes/code/n2nl_summary_barplots.R", args.summary, args.graph + "_summary.png"] p2 = subprocess.Popen(call)
def opt_best_match(inverted_db, query, threshold): sorted_db = dict() query_db = dict() for q in query: for term in q.split(): if term in inverted_db: if term not in query_db: query_db[term]= dict() query_db[term]=inverted_db[term] temp = sorted(query_db, key=lambda x:len(query_db[x]), reverse=True) sorted_db = od((x, query_db[x]) for x in temp) K = countK() best_docs = dict() best_docs2 = dict() count=0 for parola in sorted_db: for doc in sorted_db[parola]: if count<K: if doc not in best_docs: best_docs[doc]=0 count+=1 best_docs[doc]+=sorted_db[parola][doc] else: if doc in best_docs: best_docs[doc]+=sorted_db[parola][doc] #sorting temp = sorted(best_docs, key=lambda x: best_docs[x], reverse=True) sorted_docs = od((x, best_docs[x]) for x in temp) count =0 for doc in sorted_docs: if count < 20: count+=1 if doc not in best_docs2: best_docs2[doc]=0 best_docs2[doc]=sorted_docs[doc] temp2 = sorted(best_docs2, key=lambda x: best_docs2[x], reverse=True) best_docs = od((x, best_docs[x]) for x in temp2) return best_docs
def sort_tags(source): """ return a sorted version of source, with the biggests tags first """ output = od(sorted(Counter([e for e in source]).items(), key=lambda t: -t[1])) return output
def fill_mcmc_parameters(self): """ Initializes the ordered dictionary :attr:`mcmc_parameters` from the input parameter file. It uses :meth:`read_file`, and initializes instances of :class:`parameter` to actually fill in :attr:`mcmc_parameters`. """ # Define temporary quantities, only to simplify the input in the # parameter file self.parameters = od() # Read from the parameter file everything try: self.param_file = open(self.param, 'r') except IOError: raise io_mp.ConfigurationError( "Error in initializing the Data class, the parameter file " + "{0} does not point to a proper file".format(self.param)) # In case the parameter file is a log.param, scan first once the file # to extract the path dictionnary. if self.param.find('log.param') != -1: self.read_file(self.param_file, search_path=True) self.read_file(self.param_file) for key, value in self.parameters.iteritems(): self.mcmc_parameters[key] = Parameter(value, key) """
def test_query(self): from collections import OrderedDict as od cases = [ ('', ''), ('', []), ('', {}), ('?name', 'name'), ('?name', [('name', None)]), ('?name', {'name': None}), ('?name=foo', 'name=foo'), ('?name=foo', [('name', 'foo')]), ('?name=foo', {'name': 'foo'}), ('?name=foo', {'name': ['foo']}), ('?name=42', [('name', 42)]), ('?name=42', {'name': 42}), ('?name=42', {'name': [42]}), ('?name=foo&type=bar', [('name', 'foo'), ('type', 'bar')]), ('?name=foo&type=bar', od([('name', 'foo'), ('type', 'bar')])), ('?name=foo&name=bar', [('name', 'foo'), ('name', 'bar')]), ('?name=foo&name=bar', {'name': ['foo', 'bar']}), ('?name=a%2Fb%2Fc', dict(name='a/b/c')), ('?name=a%3Ab%3Ac', dict(name='a:b:c')), ('?name=a%3Fb%3Fc', dict(name='a?b?c')), ('?name=a%40b%40c', dict(name='a@b@c')), ('?name=a%23b%23c', dict(name='a#b#c')), ('?name=a%26b%26c', dict(name='a&b&c')), ('?name=a%3Bb%3Bc', dict(name='a;b;c')), ] for uri, query in cases: self.check(uri, query=query) # invalid query type for query in (0, [1]): with self.assertRaises(TypeError, msg='query=%r' % query): uricompose(query=query)
def templateInSubpage(sourcePrefix, destPrefix): generator = pagegenerators.PrefixingPageGenerator(prefix=sourcePrefix) for page in generator: if page.isRedirectPage(): continue suffix = page.title().split('/')[1] dest = pywikibot.Page(pywikibot.getSite(), title=destPrefix + suffix) print dest if dest.exists(): pywikibot.output(u"Page %s already exists" % dest.title()) #page.put(u"#redirect [[%s]]" % dest.title(), comment="Redirectez către noua locatie a graficelor cu date demografice") continue try: text = page.get() except: pywikibot.output(u"Could not read %s" % page.title()) continue output = od({}) tpl = sf.tl2Dict(sf.extractTemplate(text, u"Demografia"))[0] for i in range(1,20): p1 = u"a" + str(i) p2 = u"p" + str(i) if p1 in tpl and p2 in tpl: output[i] = {"year": tpl[p1], "pop": tpl[p2]} text = u"an,populatie\n" for elem in output: text += "%s,%s\n" % (output[elem]["year"], output[elem]["pop"]) print text dest.put(text,comment="Creez o nouă pagină cu date demografice")
def __parse_log(ins, pat_time): result = od() suite = None while True: try: line = ins.next() m = _pat_suite.match(line) if m: suite = m.groups()[0] result[suite] = {} #print('suite:', suite) continue m = _pat_case.match(line) if m: case = m.groups()[0] #print('case:', case, end='\t') while True: line = ins.next() m = _pat_sep.match(line) if m: break m = pat_time.match(line) if m: result[suite][case] = float(m.groups()[0]) #print('time:', m.groups()[0]) break except StopIteration: break return result
def combine_basic_match_page_rank(basic_best_docs, page_ranks, output, i): print >> output, "///////////////////////// Combining Basic Best Match and Page Rank /////////////////////////" print >> output, "///////////////////////// Iterazione: ",i," /////////////////////////" start_time = timeit.default_timer() tmp = dict() for doc in basic_best_docs: for doc2 in page_ranks: if doc==doc2: if doc not in tmp: tmp[doc] = 0 tmp[doc] = page_ranks[doc] temp = sorted(tmp, key=lambda x: tmp[x], reverse=True) sorted_docs = od((x, tmp[x]) for x in temp) combine_basic_match_page_rank_elapsed = timeit.default_timer() - start_time print >> output, "Tempo impiegato: ", str(combine_basic_match_page_rank_elapsed) print >> output, "Migliori 20 documenti con relativo page rank: " for doc in sorted_docs: print >> output, doc, str(sorted_docs[doc])
def process_text(*args): """ Process loaded text with textblob toolkit. Calculate text statistics. Args: *args* (list) -- PriorityQueue and raw text data Returns: | *parsed_text* (Blobber) -- Blobber obj which contains parse results | *full_tagged_sents* (dict) -- dict of *{send num: {word num: (word, POS-tag)}}* """ # TextBlob runs POS-tagging model_queue, text = args parsed_text = TextBlob(text) # POS-tagging with nltk again because TextBlob sent.tags is too slow tagger = PerceptronTagger() tagset = None sent_detector = nltk.data.load('tokenizers/punkt/english.pickle') sents_tokenized = sent_detector.tokenize(text) tokenized = [] for sent in sents_tokenized: tokenized.append(nltk.tokenize.word_tokenize(sent, language='english')) pos_sents = od() for i, sent_toks in enumerate(tokenized): pos_text = nltk.tag._pos_tag(sent_toks, None, tagger) joined_tags = [(pos[0], 'PUNC' if pos[1] not in NLTK_PENN else pos[1], n) for n, pos in enumerate(pos_text)] pos_sents[i] = joined_tags model_queue.put([parsed_text, pos_sents])
def fill_mcmc_parameters(self): """ Initializes the ordered dictionary :attr:`mcmc_parameters` from the input parameter file. It uses :meth:`read_file`, and calls :meth:`from_input_to_mcmc_parameters` to actually fill in :attr:`mcmc_parameters`. """ # Define temporary quantities, only to simplify the input in the # parameter file self.parameters = od() # Read from the parameter file everything try: self.param_file = open(self.param, 'r') except IOError: io_mp.message( "Error in initializing the data class, the parameter file \ {0} does not point to a proper file".format(self.param), "error") self.read_file(self.param_file) # Transform from parameters dictionnary to mcmc_parameters dictionary # of dictionaries, method defined just below self.from_input_to_mcmc_parameters(self.parameters)
def __init__(self, name, date, project, description=None): self.name = name self.project = project self.date = date self.description = description if description is not None else "" ana_name = self.date + "_" + self.name self.ana_dir = jn(project.path, "analyses",ana_name) logging.info("Analysis {} instantiated.".format(ana_name)) assert os.path.isdir(self.ana_dir), ("{} does not exist, create " "it by calling create_new_analysis()".format(self.ana_dir)) if self.description: with open(jn(self.ana_dir,"README"),'w') as f: f.write(self.date+'_'+self.name+'\n') try: f.write("Analysis notebook can be found in: " + \ callingframe.f_locals["_dh"]) except: pass f.write('-' * 60 + '\n') f.write(description + '\n') f.write('-' * 60 + '\n') f.write('Data produced by this analysis can be found in ./_data\n') f.write("See ./log and ./jobscripts for more information\n") f.write("In ./log each analysis step is documented in a .README file\n") #TODO: We should define a workflow chain and procude a log file for this. try: with open(jn(self.ana_dir,"log","steps.json")): pass except: pass self.steps = od()
def proxy_repr(self, pprint=False, outfile=True, json_path='./jsons/proxies'): """ proxy_dict is an OrderedDict """ proxy_dict = od() proxy_dict['sitename'] = self.sitename proxy_dict['coords'] = self.coords proxy_dict['season'] = self.season proxy_dict['dataset'] = self.dataset proxy_dict['variable'] = self.variable proxy_dict['calc_anoms'] = self.calc_anoms proxy_dict['detrend'] = self.detrend proxy_dict['value'] = self.value proxy_dict['climatology'] = self.climatology proxy_dict['period'] = self.period proxy_dict['extracted_coords'] = self.extracted_coords.tolist() proxy_dict['distance_point'] = self.distance_point proxy_dict['trend_params'] = self.trend_params proxy_dict['category'] = self.category proxy_dict['analog_years'] = self.analog_years.tolist() if pprint: pprint_od(proxy_dict) if outfile: proxy_name = self.sitename.replace(" ","_") proxy_name = proxy_name.replace(".","") #proxy_name = fname = "{}.json".format(self.sitename.replace(" ","_")) with open(os.path.join(json_path, fname),'w') as f: json.dump(proxy_dict, f) self.proxy_dict = proxy_dict
def get_ngrams(txtblob_obj): """ Calculate word and ngram counts for Graphs option. Calculate top n frequent words. Calculate top n 2-grams Calculate top n 3-grams Args: *txtblob_obj* (Blob) -- object containing parse results Returns: |*mostn* (list) -- a list of n most frequent words |*ngram2* (list) -- a list of n most frequent 2-grams |*ngram3* (list) -- a list of n most frequent 3-grams """ counter = Counter(txtblob_obj[0].words) counts_dic = od(counter.most_common()) tags_dic = dict(txtblob_obj[0].tags) # POS-tags included into most frequent words list include = ('JJ', 'JJR', 'JJS', 'NN', 'NNS', 'NNP', 'NNPS', 'VB', 'VBG') # get n most frequent words mostn = [(k, counts_dic[k]) for k in counts_dic if tags_dic.get(k) in include][:10] ngram2_cnt = Counter([(n[0], n[1]) for n in txtblob_obj[0].ngrams(2)]) ngram3_cnt = Counter([(n[0], n[1], n[2]) for n in txtblob_obj[0].ngrams(3)]) ngram2 = [(n[0], ngram2_cnt[n[0]]) for n in ngram2_cnt.most_common(10)] ngram3 = [(n[0], ngram3_cnt[n[0]]) for n in ngram3_cnt.most_common(10)] return mostn, ngram2, ngram3
def parse_enhancements(params): '''Parse image enhancements and their parameters, if any. :param params: list of enhancement names and parameters :type params: list of strings :rtype: ordered dictionary example of *params*: ['usm:20,8', 'br'] ''' output = od() if len(params) > 0: LOGGER.debug("Parsing enhancements.") for param in params: param = param.split(":") if len(param) > 1: name, args = param args = args.split(',') arg_list = [] for arg in args: try: arg = float(arg) except ValueError: pass arg_list.append(arg) output[name] = arg_list else: output[param[0]] = None return output
def main(args): logger.info('loading pickle: %s' % args.pickle) uuids = od() with open(args.uuids) as uuid_in: for line in uuid_in: uuid = line.strip().split()[0] uuids[uuid] = True insertions = [] with open(args.pickle, 'r') as pickin: insertions = pickle.load(pickin) logger.info('finished loading %s' % args.pickle) logger.info('raw candidate count: %d' % len(insertions)) filtered = [] for ins in insertions: if ins['INFO']['ins_uuid'] in uuids: filtered.append(ins) logger.info('kept %d records' % len(filtered)) with open(args.out, 'w') as pickout: pickle.dump(filtered, pickout)
def parse(fname): d = od() for line in file(fname): line = "".join([i if ord(i) < 128 else " " for i in line]) toks = line.split() num, category, operator = toks[0], toks[1], " ".join(toks[2:]) d[(category, operator)] = num return d
def parse_new(soup,filename): """ parse a 2016 era webpage read from file filename Parameters ---------- soup: a beautiful soup object filename: the original html filename (for the metadata) Returns ------- keep_dict: an ordered dictionary with metadata and temperature forecasts for the week """ keep_dict=od() keep_dict['filename'] = filename # # get the metadata from the right column of the "about_forecast" table # info = soup.findAll("div", { "id" : "about_forecast" }) lines=list(info[0].children) location=lines[1].findAll('div', {"class" : "right"}) placename,latlon = list(location[0].children) keep_dict['placename'] = placename.strip() latlon = latlon.text.strip() # # change unicode degree sign to "deg" # latlon = latlon.replace('\u00b0',' deg ') keep_dict['location'] = latlon update=lines[3].findAll('div', {"class" : "right"}) keep_dict['last_update'] = update[0].text.strip() valid=lines[5].findAll('div', {"class" : "right"}) keep_dict['valid'] = valid[0].text.strip() # # get the days and temps from two columns of the forecast table # forecast_times=soup.findAll('div', {"class" : "col-sm-2 forecast-label"}) forecast_temps=soup.findAll('div', {"class" : "col-sm-10 forecast-text"}) keep_dict['temps'] = od() for the_time,the_temp in zip(forecast_times,forecast_temps): match = find_temp.match(the_temp.text) keep_dict['temps'][the_time.b.text] = int(match.group(1)) return keep_dict
def load_mappings(self): self.mappings=od() mappings_path=QtCore.QDir("mappings:/") for i in mappings_path.entryList(): if i in (".",".."): continue mapping=mappings_path.absoluteFilePath(i) self.mappings[os.path.splitext(i)[0].title()]=LetterMapping(mapping)
def od(self): """ View Ami event as ordered dict (most useful). """ event = self._event if hasattr(self, "_extra"): event += self._extra return od(line.t for line in event)
def load_all(self): self.load_schemas() self.load_all_relations() self.load_functions() self.selectables = od() self.selectables.update(self.relations) self.selectables.update(self.functions) self.load_deps() self.load_deps_all()
def __init__(self,sequence='',name='DNA',validate_sequence=False): self.parial_dict = { 't7':(r_compile('\w*?'+self.deg_replace('TAATACGACTCACTATAGN'),flags=ig),'NN'), 'sp6':(r_compile('\w*?'+self.deg_replace('ATTTAGGTGACACTATAGN'),flags=ig),'N'), 't3':(r_compile('\w*?'+self.deg_replace('AATTAACCCTCACTAAAGN'),flags=ig),'N'), 'sgRNA_overlap':(r_compile(self.deg_replace('GTTTTAGAGCTAGAAN')+'*',flags=ig),''), 'attB1R_primer': (r_compile('\w*?'+self.deg_replace('ACTGCTTTTTTGTACAAACTTG'),flags=ig),''), 'attB1_primer': (r_compile('\w*?'+self.deg_replace('ACAAGTTTGTACAAAAAAGCAGGCT'),flags=ig),''), 'attB2R_primer': (r_compile('\w*?'+self.deg_replace('ACCACTTTGTACAAGAAAGCTGGGT'),flags=ig),''), 'attB2_primer': (r_compile('\w*?'+self.deg_replace('ACAGCTTTCTTGTACAAAGTGG'),flags=ig),''), 'attB3R_primer': (r_compile('\w*?'+self.deg_replace('ACAACTTTGTATAATAAAGTTG'),flags=ig),''), 'attB4_primer': (r_compile('\w*?'+self.deg_replace('ACAACTTTGTATAGAAAAGTTG'),flags=ig),'')} self.sequence = r_sub('[\W]','',sequence) self.valid_characters = {'A','T','C','G','N','H','D','V','B','K','M','Y','R','W','S'} if validate_sequence and not self.validate_sequence(self.sequence): raise self.SequenceError("Sequence contains invalid characters") self.name = name self.file_name = None self._save_path = plp.cwd() self.features = od() self.comments = [] self.length = len(self.sequence) self._ape_header1 = 'LOCUS\nACCESSION\nVERSION\n' self._ape_header2 = 'COMMENT ApEinfo:methylated:1\nFEATURES{}Location/Qualifiers\n'.format(' '*13) self._ape_feature = ' {7}{5}{0[0]}..{0[1]}{6}\n{4}/label={1}\n{4}/ApEinfo_fwdcolor="{2}"\n{4}/ApEinfo_revcolor="{3}"\n{4}/ApEinfo_graphicformat="arrow_data {{{{0 1 2 0 0 -1}} {{}} 0}}\n{4}width 5 offset 0"\n' self.color_dict = od( [('Cayenne', '#800000'), ('Asparagus', '#808000'), ('Clover', '#008000'), ('Teal', '#008080'), ('Midnight', '#000080'), ('Plum', '#800080'), ('Tin', '#7F7F7F'), ('Nickel', '#808080'), ('Mocha', '#804000'), ('Fern', '#408000'), ('Moss', '#008040'), ('Ocean', '#004080'), ('Eggplant', '#400080'), ('Maroon', '#800040'), ('Steel', '#666666'), ('Aluminum', '#999999'), ('Marascino', '#FF0000'), ('Lemon', '#FFFF00'), ('Spring', '#00FF00'), ('Turquoise', '#00FFFF'), ('Blueberry', '#0000FF'), ('Magenta', '#FF00FF'), ('Iron', '#4C4C4C'), ('Magnesium', '#B3B3B3'), ('Tangerine', '#FF8000'), ('Lime', '#80FF00'), ('SeaFoam', '#00FF80'), ('Aqua', '#0080FF'), ('Grape', '#8000FF'), ('Strawberry', '#FF0080'), ('Tungsten', '#333333'), ('Silver', '#CCCCCC'), ('Salmon', '#FF6666'), ('Banana', '#FFFF66'), ('Flora', '#66FF66'), ('Ice', '#66FFFF'), ('Orchid', '#6666FF'), ('Bubblegum', '#FF66FF'), ('Lead', '#191919'), ('Mercury', '#E6E6E6'), ('Cantaloupe', '#FFCC66'), ('Honeydew', '#CCFF66'), ('Spindrift', '#66FFCC'), ('Sky', '#66CCFF'), ('Lavender', '#CC66FF'), ('Carnation', '#FF6FCF'), ('Licorice', '#000000'), ('Snow', '#FFFFFF'), ('black', 'black'), ('blue', 'blue'), ('brown', 'brown'), ('cyan', 'cyan'), ('green', 'green'), ('magenta', 'magenta'), ('orange', 'orange'), ('purple', 'purple'), ('red', 'red'), ('yellow', 'yellow'), ('white', 'white')] ) self._color_series = ['#FF0000', '#FFFF00', '#00FFFF', '#80FF00', '#0000FF', '#FF00FF', '#0080FF', '#FF8000', '#00FF80', '#8000FF', '#FF0080'] self._color_pos = 0
def __getattr__(self, name): if name == "non_pk_constraints": a = self.i_from.constraints.items() b = self.i_target.constraints.items() a_od = od((k, v) for k, v in a if v.constraint_type != PK) b_od = od((k, v) for k, v in b if v.constraint_type != PK) return partial(statements_for_changes, a_od, b_od) elif name == "pk_constraints": a = self.i_from.constraints.items() b = self.i_target.constraints.items() a_od = od((k, v) for k, v in a if v.constraint_type == PK) b_od = od((k, v) for k, v in b if v.constraint_type == PK) return partial(statements_for_changes, a_od, b_od) elif name == "selectables": return partial( get_selectable_changes, od(sorted(self.i_from.selectables.items())), od(sorted(self.i_target.selectables.items())), self.i_from.enums, self.i_target.enums, ) elif name == "triggers": return partial( get_trigger_changes, od(sorted(self.i_from.triggers.items())), od(sorted(self.i_target.triggers.items())), od(sorted(self.i_from.selectables.items())), od(sorted(self.i_target.selectables.items())), self.i_from.enums, self.i_target.enums, ) elif name in THINGS: return partial( statements_for_changes, getattr(self.i_from, name), getattr(self.i_target, name), ) else: raise AttributeError(name)
def parse_html_template(html): ''' Returns the evaluated html as an ordered dictionary of sections. This dictionary follows this format: <block_name> - the key to the section defined as the block name - <type> - the type of tag it is - <html> - a string that preserves the order of the childblocks and the innerhtml defined there - <child_blocks> (optional)- an ordered dictionary of blocks with the key being the name of the block - <parent> (optional) - the parent block that this rests in, creating a two way link between block and childblock - <tag> - the same as the key @param html: the template as a string ''' sections = od() tag_stack = [] out = "" in_html_tag = False i = 0 while i < len(html): c = html[i] if is_start_tag(c, i, html): i, tag = parse_tag(i + 2, html) #skip creation modulo out = add_tag(sections, tag_stack, tag, out) elif c == '%' and not in_html_tag: try: i, tag = parse_tag(i + 1, html) except SyntaxError: out += c #Modulo is allowed i += 1 raise SyntaxError( "Modulo found outside of html tag and without an opening brace at char '{}' for tag '{}'." .format(i, tag)) elif c == '<': in_html_tag = True i += 1 out += c elif c == '<' and in_html_tag: raise SyntaxError( "Character < found inside of an html tag at char '{}'".format( i)) elif c == '>': in_html_tag = False i += 1 out += c else: out += c i += 1 if tag_stack: raise KeyError( "The following blocks did not have an end tag defined: '{}'". format(tag_stack)) if sections["parent"]["type"] == "head": sections["foot"] = {"type": "foot", "html": "out"} return sections
def load_types(self): q = self.c.execute(self.TYPES_QUERY) def col(defn): return defn["attribute"], defn["type"] types = [ InspectedType(i.name, i.schema, dict(col(_) for _ in i.columns)) for i in q ] # type: list[InspectedType] self.types = od((t.signature, t) for t in types)
def generate_webms(self): print('Generating webms') for node in self.todo['webm']: node = node.split('.')[0] infile = os.path.join(ORIG, self.orig[node]) outfile = os.path.join(COMPUTED, '{}.webm'.format(node)) ff = ffmpy3.FFmpeg(inputs=od([(infile, None)]), outputs={outfile: "-an -map 0:v -vf scale=640:360:force_original_aspect_ratio=decrease -b:v 900k -codec:v libvpx -auto-alt-ref 0"}) ff.run() self.todo['webm'] = []
def info(self): from collections import OrderedDict as od info = od() info['id'] = self.id info['events'] = self.events info['tasks_in'] = self.tasks_in info['tasks_out'] = self.tasks_out info['pending_clients_demands'] = self.clients_demands_pending_answer info['pending_replies'] = self.pending_replies return info
def ParseKaryotypes(karyotypes): """ """ karyoreader = reader(open(karyotypes), delimiter="\t") karyodict = od() for row in karyoreader: karyodict[row[1]] = [row[0], row[2], row[3]] return karyodict
def __init__(self, filename=None, header=None, terms=None, typedefs=None, instances=None): self.filename = filename self.Terms = od() self.Terms.names = {} self.Typedefs = od() self.Typedefs.names = {} self.Instances = od() self.Instances.names = {} self.Headers = od() #LOL STUPID FIXME self.Headers.names = {} # FIXME do not want? what about imports? if filename is not None: # FIXME could spec filename here? #od_types = {type_.__name__:type_od for type_,type_od in zip((Term, Typedef, Instance),(self.Terms,self.Typedefs,self.Instances))} #LOL GETATTR with open(filename, 'rt') as f: data = f.read() #deal with \<newline> escape data = data.replace(' \n', '\n') # FXIME need for arbitrary whitespace data = data.replace('\<newline>\n', ' ') # TODO remove \n!.+\n sections = data.split('\n[') header_block = sections[0] self.header = Header(header_block, self) stanzas = sections[1:] for block in stanzas: block_type, block = block.split(']\n', 1) type_ = stanza_types[block_type] #odt = od_type[block_type] t = type_(block, self) # FIXME :/ self.add_tvpair_store(t) elif header is not None: self.header = header self.Terms = terms # TODO this should take iters not ods self.Typedefs = typedefs self.Instances = instances elif header is None: self.header = None
def __init__(self, data=None, altitude=None, dtime=None, var_name=None, data_err=None, var_unit=None, altitude_unit=None, **location_info): if data is None: data = [] if data_err is None: data_err = [] if dtime is None: dtime = [] if altitude is None: altitude = [] if var_name is None: var_name = 'data' self._var_name = None self._data = [] self._data_err = [] self._altitude = [] self._vert_coord_name = None self._vert_coord_vals = od() self.var_info = BrowseDict() self.var_info['altitude'] = od() self.update(**location_info) self.var_name = var_name self.dtime = dtime self.data = data self.data_err = data_err self.altitude = altitude if var_unit is not None: self.var_unit = var_unit if altitude_unit is not None: self.altitude_unit = altitude_unit
def load_comments(self): q = self.c.execute(self.COMMENTS_QUERY) comments = [ InspectedComment(schema=c.nspname, object_type=c.objtype, object_name=c.objname, object_subname=c.objsubname, comment=c.description) for c in q ] self.comments = od((i.key, i) for i in comments)
def generate_step(workflow_key, resource, state, check_mode, step_name=None): if step_name is None: step_name = 'Processing resource `%s` for state `%s` with check_mode `%s`' % (workflow_key, state, check_mode) step = od([ ('name', step_name), ('include_tasks', od([ ('file', 'tasks/nitro_resource_task.yaml'), ('apply', od([ ('vars', od([ ('resource_name', workflow_key), ('state', state), ('check_mode', check_mode), ('workflow_dict', '{{ workflow.%s }}' % workflow_key), ('resource_attributes', copy.deepcopy(resource)), ])), ])), ])), ]) return step
def create_basic_section_workflows(args, workflows): workflows['server'] = od([ ('lifecycle', 'object'), ('endpoint', 'server'), ('primary_id_attribute', 'name'), ('resource_missing_errorcode', '258'), ('allow_recreate', 'true'), ('non_updateable_attributes', _get_non_updateable_attributes(args, 'server')), ]) workflows['service'] = od([ ('lifecycle', 'object'), ('endpoint', 'service'), ('primary_id_attribute', 'name'), ('resource_missing_errorcode', '344'), ('allow_recreate', 'true'), ('non_updateable_attributes', _get_non_updateable_attributes(args, 'service')), ]) workflows['servicegroup'] = od([ ('lifecycle', 'object'), ('endpoint', 'servicegroup'), ('primary_id_attribute', 'servicegroupname'), ('resource_missing_errorcode', '258'), ('allow_recreate', 'true'), ('non_updateable_attributes', _get_non_updateable_attributes(args, 'servicegroup')), ]) workflows['service_lbmonitor_binding'] = od([ ('lifecycle', 'binding'), ('endpoint', 'service_lbmonitor_binding'), ('bound_resource_missing_errorcode', '258'), ('primary_id_attribute', _get_bindig_id_attributes(args, 'service_lbmonitor_binding')[1]), ('delete_id_attributes', _get_bindig_id_attributes(args, 'service_lbmonitor_binding')[0]), ]) workflows['servicegroup_lbmonitor_binding'] = od([ ('lifecycle', 'binding'), ('endpoint', 'servicegroup_lbmonitor_binding'), ('bound_resource_missing_errorcode', '351'), ('primary_id_attribute', _get_bindig_id_attributes(args, 'servicegroup_lbmonitor_binding')[1]), ('delete_id_attributes', _get_bindig_id_attributes(args, 'servicegroup_lbmonitor_binding')[0]), ])
def init_bindings(sig: Signature) -> od: bids = od([]) for n, p in sig.parameters.items(): if p.kind is Parameter.VAR_POSITIONAL: bids[n] = tuple() elif p.kind is Parameter.VAR_KEYWORD: bids[n] = dict() else: bids[n] = p.default return bids
def test_switcher_function(self): tests_path = os.path.join('test_data', 'switcher_function_tests.xml') for etr in load_xml_tests(tests_path).values(): tup_arg = ast.literal_eval(etr.findtext('tup_arg')) remapped = od(ast.literal_eval(etr.findtext('remapped'))) deptype = etr.findtext('deptype') govs = ast.literal_eval(etr.findtext('govs')) head = ast.literal_eval(etr.findtext('head')) self.assertEqual( hs.alg_controller(tup_arg, remapped, deptype, govs), head)
def info_init(self): """Empty dictionary containing init values of infos to be extracted from filenames """ return od(year=None, var_name=None, ts_type=None, vert_code='', is_at_stations=False, data_id='')
def copy_key(redis_client, ledis_client, key, convert=False): global entries k_type = redis_client.type(key) if k_type == "string": value = redis_client.get(key) ledis_client.set(key, value) set_ttl(redis_client, ledis_client, key, k_type) entries += 1 elif k_type == "list": _list = redis_client.lrange(key, 0, -1) for value in _list: ledis_client.rpush(key, value) set_ttl(redis_client, ledis_client, key, k_type) entries += 1 elif k_type == "hash": mapping = od(redis_client.hgetall(key)) ledis_client.hmset(key, mapping) set_ttl(redis_client, ledis_client, key, k_type) entries += 1 elif k_type == "zset": # dangerous to do this? out = redis_client.zrange(key, 0, -1, withscores=True) pieces = od() for i in od(out).iteritems(): pieces[i[0]] = int(i[1]) ledis_client.zadd(key, **pieces) set_ttl(redis_client, ledis_client, key, k_type) entries += 1 elif k_type == "set": mbs = list(redis_client.smembers(key)) if mbs is not None: ledis_client.sadd(key, *mbs) set_ttl(redis_client, ledis_client, key, k_type) entries += 1 else: print "KEY %s of TYPE %s is not supported by LedisDB." % (key, k_type)
def load_varconfig_ini(fpath): cfg = ConfigParser(allow_no_value=True) cfg.optionxform = str cfg.read(fpath) sections = cfg.sections() vals_raw = cfg._sections result = od() for key in sections: result[key] = list(vals_raw[key].keys()) return result
def has_compatible_columns(self, other): items = list(self.columns.items()) if self.relationtype != "f": old_arg_count = len(other.columns) items = items[:old_arg_count] items = od(items) return items == other.columns
def mkpspl(self, pltrg, statement): ''' PL データをもとに比例縮尺損益計算書を作成 ''' # 利益の計算 if "earnings" not in statement.keys(): statement["earnings"] = od([("営業利益", sum(statement["income"].values()) - sum(statement["expenses"].values()))]) # PL の図示 # 黒字企業の場合 if sum(statement["earnings"].values()) > 0: # 利益 bottom = self._displayItems(pltrg, statement["earnings"], 1, 0, "#DCEDC8") # 費用 self._displayItems(pltrg, statement["expenses"], 1, bottom, "#FFF9C4") # 収益 self._displayItems(pltrg, statement["income"], 2, 0, "#FFE0B2") # 赤字企業の場合 else: # 費用 self._displayItems(pltrg, statement["expenses"], 1, 0, "#FFF9C4") # 利益 bottom = self._displayItems(pltrg, statement["earnings"], 2, 0, "#DCEDC8") # 収益 self._displayItems(pltrg, statement["income"], 2, bottom, "#FFE0B2") # title if self.pltitle: pltrg.set_title(self.pltitle) # x軸 pltrg.tick_params(labelbottom="off", bottom="off") # y 軸 if self.noylab: pltrg.tick_params(labelleft="off", left="off") # ylim if self.basis: pltrg.set_ylim((0, self.basis))
def dis_to_known_site_plot(in_isoform_res, max_dis=30, out_plot='dis_to_known.png'): ut.err_format_time( 'dis_to_known_site_plot', 'Plotting dis_known_site_plot for {} ... '.format(in_isoform_eval_out)) internal_dis_dict = dd(lambda: 0) back_dis_dict = dd(lambda: 0) dis_list = [i for i in range(-max_dis, max_dis + 1)] xlabel = 'Distance' ylabel = 'log10(Read count + 1)' with open(in_isoform_res) as in_fp: for line in in_fp: if line.startswith('#'): continue ele = line.rsplit() get_dis_to_known(ele, internal_dis_dict, back_dis_dict) for i in dis_list: internal_dis_dict[i] += 0 back_dis_dict[i] += 0 inter_od_dict = od([(i, math.log10(internal_dis_dict[i] + 1)) for i in dis_list]) back_od_dict = od([(i, math.log10(back_dis_dict[i] + 1)) for i in dis_list]) dis_dict = { 'Internal splice-site': list(inter_od_dict.values()), 'Back-splice-site': list(back_od_dict.values()) } # print dis_dict dkp.dis_to_known_plot(out_fig=out_plot, in_dict=dis_dict, subgroup=[1, 1], title='Distance to known splice-site', xticks=dis_list, xlabel=xlabel, ylabel=ylabel) ut.err_format_time( 'dis_to_known_site_plot', 'Plotting dis_known_site_plot for {} done!'.format( in_isoform_eval_out)) return
def __init__(self, **kwargs): self._head_fix = od(num_head_lines = np.nan, num_head_fmt = np.nan, data_originator = "", sponsor_organisation = "", submitter = "", project_association = "", vol_num = np.nan, vol_totnum = np.nan, ref_date = np.nan, revision_date = np.nan, freq = np.nan, descr_time_unit = "", num_cols_dependent = np.nan, mul_factors = [], vals_invalid = [], descr_first_col = "") self._var_defs = [] self._meta = od() self.update(**kwargs)
def load_xml_tests(test_name): '''This function reads test_name xml file and returns an ordered dict of parsed etree objects''' with open(test_name, 'r') as f: fdata = f.read() etrees = od() for sent in enumerate(fdata.split('\n\n')): if not sent[1]: continue etrees[sent[0]] = et.fromstring(sent[1]) return etrees
def __init__(self, forms_dict=None): """Class initialisation.""" if forms_dict is None: forms_dict = {} self._forms = od() self.id_count = 0 self.type = "" for key, val in six.iteritems(forms_dict): self[key] = val
def zero(self, rail_h, cb_menu_asks_display_to_clear, cb_menu_asks_display_to_write, cb_menu_selection, height, width, title): self.rail_h = rail_h self.cb_menu_asks_display_to_clear = cb_menu_asks_display_to_clear self.cb_menu_asks_display_to_write = cb_menu_asks_display_to_write self.height = height self.width = width self.title = title # self.d_menu = od()
def getHashSums(file_path): hashSums = od() hashSums['md5sum'] = hashlib.md5() hashSums['sha1sum'] = hashlib.sha1() hashSums['sha224sum'] = hashlib.sha224() hashSums['sha256sum'] = hashlib.sha256() hashSums['sha384sum'] = hashlib.sha384() hashSums['sha512sum'] = hashlib.sha512() with open(file_path, 'rb') as fd: dataChunk = fd.read(1024) #Reading only 1mb at a time while dataChunk: for hashsum in hashSums.keys(): hashSums[hashsum].update(dataChunk) dataChunk = fd.read(1024) results = od() for key, value in hashSums.items(): results[key] = value.hexdigest() return results
def apply_changes_rename(self): df = self.df_edit mapping = od() for i, name in enumerate(self.run_names): repl = str(self.input_fields_rename[i].value) mapping[name] = repl self.df_edit = df.rename(index=mapping, level=self.run_level_idx) self.output.append_display_data( "Applying renaming: {}".format(mapping))
def apply_changes(self): df = self.df mapping = od() for i, name in enumerate(self.names): repl = str(self.input_fields[i].value) mapping[name] = repl self._df_edit = df.rename(index=mapping, level=self.level) self.disp_current()
def _fake_import_specs(): """Returns dictionary for adding a new fake import type""" return od([("type", "fake"), ("access_type", "col_index"), ("file_type", "csv"), ("time_str_formats", "%Y%m%d%H%M"), ("delim", ";"), ("start", 0), #col num ("stop", 1), #col num ("bla" , "Blub"), #invalid (for test purpose) ("num_scans", 4)]) #colnum
def compute(self, recs, truth, *, progress=lambda x: x): """ Run the analysis. Neither data frame should be meaningfully indexed. Args: recs(pandas.DataFrame): A data frame of recommendations. truth(pandas.DataFrame): A data frame of ground truth (test) data. Returns: pandas.DataFrame: The results of the analysis. """ _log.info('analyzing %d recommendations (%d truth rows)', len(recs), len(truth)) gcols = self.group_cols if gcols is None: gcols = [c for c in recs.columns if c not in self.DEFAULT_SKIP_COLS] _log.info('using group columns %s', gcols) _log.info('ungrouped columns: %s', [c for c in recs.columns if c not in gcols]) gc_map = dict((c, i) for (i, c) in enumerate(gcols)) ti_cols = [c for c in gcols if c in truth.columns] ti_cols.append('item') _log.info('using truth ID columns %s', ti_cols) truth = truth.set_index(ti_cols) if not truth.index.is_unique: warnings.warn('truth frame does not have unique values') truth.sort_index(inplace=True) _log.info('preparing analysis result storage') # we manually use grouping internals grouped = recs.groupby(gcols) res = pd.DataFrame(od((k, np.nan) for (f, k, args) in self.metrics), index=grouped.grouper.result_index) assert len(res) == len(grouped.groups), \ "result set size {} != group count {}".format(len(res), len(grouped.groups)) assert res.index.nlevels == len(gcols) _log.info('computing anlysis for %d lists', len(res)) for i, row_key in enumerate(progress(res.index)): g_rows = grouped.indices[row_key] g_recs = recs.iloc[g_rows, :] if len(ti_cols) == len(gcols) + 1: tr_key = row_key else: tr_key = tuple([row_key[gc_map[c]] for c in ti_cols[:-1]]) g_truth = truth.loc[tr_key, :] for j, (mf, mn, margs) in enumerate(self.metrics): res.iloc[i, j] = mf(g_recs, g_truth, **margs) return res