Exemple #1
0
 def __init__(
     self,
     name,
     schema,
     columns,
     inputs=None,
     definition=None,
     dependent_on=None,
     dependents=None,
     comment=None,
     relationtype="unknown",
     parent_table=None,
     partition_def=None,
     rowsecurity=False,
     forcerowsecurity=False,
 ):
     self.name = name
     self.schema = schema
     self.inputs = inputs or []
     self.columns = columns
     self.definition = definition
     self.relationtype = relationtype
     self.dependent_on = dependent_on or []
     self.dependents = dependents or []
     self.dependent_on_all = []
     self.dependents_all = []
     self.constraints = od()
     self.indexes = od()
     self.comment = comment
     self.parent_table = parent_table
     self.partition_def = partition_def
     self.rowsecurity = rowsecurity
     self.forcerowsecurity = forcerowsecurity
def combine_basic_match_topic_sensitive_page_rank(basic_best_docs,  ts_page_ranks, output, i):
    print >> output, "///////////////////////// Combining Basic Best Match and Topic Sensitive Page Rank /////////////////////////"
    print >> output, "///////////////////////// Iterazione: ",i," /////////////////////////"

    start_time = timeit.default_timer()

    temp1 =  sorted(ts_page_ranks, key=lambda x: ts_page_ranks[x], reverse=True)
    ts_pr = od((x, ts_page_ranks[x]) for x in temp1)

    tmp = dict()
    for doc in basic_best_docs:
        for topic in ts_pr.keys():
            for doc2 in ts_pr[topic]:
                if doc==doc2:
                    if doc not in tmp:
                    	tmp[doc] = 0
                    tmp[doc] = ts_pr[topic][doc]
				
          
    temp =  sorted(tmp, key=lambda x: tmp[x], reverse=True)
    sorted_docs = od((x, tmp[x]) for x in temp)
    
    
    combine_basic_match_topic_sensitive_page_rank = timeit.default_timer() - start_time
    print >> output, "Tempo impiegato: ", str(combine_basic_match_topic_sensitive_page_rank)

    print >> output, "Migliori 20 documenti con relativo topic sensitive page rank: "
    print >> output, sorted_docs
Exemple #3
0
def copy_key(redis_client, ledis_client, key, convert=False):
    global entries
    k_type = redis_client.type(key)
    if k_type == "string":
        value = redis_client.get(key)
        ledis_client.set(key, value)
        set_ttl(redis_client, ledis_client, key, k_type)
        entries += 1

    elif k_type == "list":
        _list = redis_client.lrange(key, 0, -1)
        for value in _list:
            ledis_client.rpush(key, value)
        set_ttl(redis_client, ledis_client, key, k_type)
        entries += 1

    elif k_type == "hash":
        mapping = od(redis_client.hgetall(key))
        ledis_client.hmset(key, mapping)
        set_ttl(redis_client, ledis_client, key, k_type)
        entries += 1

    elif k_type == "zset":
        out = redis_client.zrange(key, 0, -1, withscores=True)
        pieces = od()
        for i in od(out).iteritems():
            pieces[i[0]] = int(i[1])
        ledis_client.zadd(key, **pieces)
        set_ttl(redis_client, ledis_client, key, k_type)
        entries += 1

    else:
        print "KEY %s of TYPE %s is not supported by LedisDB." % (key, k_type)
Exemple #4
0
    def test_etree(self):
        'Parker conversion from data to etree'
        eq = self.check_etree(xmljson.parker)

        # From https://developer.mozilla.org/en-US/docs/JXON#In_summary
        eq({'animal': {}}, '<animal/>')
        eq({'animal': 'Deka'}, '<animal>Deka</animal>')
        eq({'animal': 1}, '<animal>1</animal>')
        eq({'animal': od([('dog', 'Charlie'), ('cat', 'Deka')])},
           '<animal><dog>Charlie</dog><cat>Deka</cat></animal>')
        eq({'animal': {'dog': ['Charlie', 'Mad Max']}},
           '<animal><dog>Charlie</dog><dog>Mad Max</dog></animal>')

        # Test edge cases
        eq('x', '<x/>')             # Strings become elements
        eq({})                      # Empty objects become empty nodes
        eq(od([                     # Multiple keys become multiple nodes
            ('x', 'a'),
            ('y', 'b')
        ]), '<x>a</x>', '<y>b</y>')
        with self.assertRaises(Exception):
            eq({'x': {'@x': 1}}, '<x x="1"/>')

        # Nested elements
        eq({'alice': od([
            ('bob', {'charlie': {}}),
            ('david', {'edgar': {}})])},
           '<alice><bob><charlie/></bob><david><edgar/></david></alice>')

        # Multiple elements at the same level become array elements.
        eq({'alice': {'bob': [{'charlie': {}}, {'david': {}}]}},
           '<alice><bob><charlie/></bob><bob><david/></bob></alice>')
def create_epdict(ep_dict, url):
    try:
        scraper = cfscrape.create_scraper()
        content =  scraper.get(url).content
        soup = bs(content)
        epdict = {}
        eplist = []
        titlelist = []
        if "Drama" in url:
            for link in soup.find_all('a'):
                    if 'href' in str(link):
                        try:
                            if "Episode-" in link['href']:
                                possible = link['href'].split("Episode-")[1]
                                possible = "/Episode-" + possible
                                fullurl = url + possible
                                fulltitle = link['title'].split("Episode ")[1][:9]
                                integers = [str(i) for i in range(0,10)]
                                if possible:
                                    episode = possible[:7]
                                if fulltitle[0] in integers and fulltitle[1] in integers and fulltitle[2] in integers:
                                    title = fulltitle[:3]
                                elif fulltitle[0] in integers and fulltitle[1] in integers:
                                    title = fulltitle[:2]
                                else:
                                    title = fulltitle[0]
                                eplist.append(fullurl.encode('ascii'))
                                titlelist.append(title.encode('ascii'))

                        except TypeError:
                            pass
            epdict = od((zip(titlelist[::-1][0:], eplist[::-1][0:])))
            return epdict
        else:
            for link in soup.find_all('a'):
                    try:
                        if "Episode-" in link['href']:
                            possible = link['href'].split("Episode-")[1]
                            possible = "/Episode-" + possible
                            fullurl = url + possible
                            fulltitle = link['title'].split("Episode ")[1][:9]
                            integers = [str(i) for i in range(0,10)]
                            if fulltitle[4] in integers and fulltitle[5] in integers and fulltitle[6] in integers:
                                title = fulltitle[:7]
                            elif fulltitle[3:6] == " - " and fulltitle[6::9] in integers and fulltitle[7::9] in integers and fulltitle[8::9] in integers:
                                title = fulltitle[:9]
                            elif 'v' in fulltitle[:4].lower():
                                title = fulltitle[0:5]
                            else:
                                title = fulltitle[:3]
                            eplist.append(fullurl.encode('ascii'))
                            titlelist.append(title.encode('ascii'))
                    except TypeError:
                        pass
            epdict = od((zip(sorted(titlelist),sorted(eplist))))
            return epdict
    except:
        pass
def make_ped_file(snp_list, annot_file, fam_file, conf_file, out_pre):
    """
    The PED file is a white-space (space or tab) delimited file.
    the first six columns are mandatory:
         Family ID
         Individual ID
         Paternal ID
         Maternal ID
         Sex (1=male; 2=female; other=unknown)
         Phenotype
    Genotypes (column 7 onwards) should also be white-space delimited;
    they can be any character (e.g. 1,2,3,4 or A,C,G,T or anything else)
    except 0 which is, by default, the missing genotype character.
    All markers should be biallelic.

    """

    good_snps = set([x.rstrip() for x in snp_list])
    annots = od()
    annot_reader = csv.reader(annot_file, delimiter=",", quotechar='"')
    for row in annot_reader:
        if row[0].startswith('AX-') and row[0] in good_snps:
            # Probe Set ID: [Chromosome,Physical Position,Allele A,Allele B]
            kept_annots = [row[x] for x in [3, 4, 9, 10]]
            kept_annots[0] = kept_annots[0].split('.')[1]
            annots[row[0]] = kept_annots

    fam_dict = od()
    for l in fam_file:
        tmp = l.rstrip().split()
        fam_dict[tmp[1]] = tmp

    kept_samples = []
    sample_names = []
    gts = {}
    gt_reader = csv.reader(conf_file, delimiter="\t")
    for row in gt_reader:
        if not row[0].startswith('#'):
            if row[0] == "Probe Set ID" or row[0] == 'probeset_id':
                for i, ind_tmp in enumerate(row[1:]):
                    ind = re.sub('(\.AxiomGT1\.chp Call Codes)|(\.CEL)', '', ind_tmp)
                    if ind in fam_dict:
                        kept_samples.append(i+1)
                        sample_names.append(ind)
            else:
                if row[0] in annots:
                    gts[row[0]] = [row[x] for x in kept_samples]

    with open('{}.conf'.format(out_pre), 'w') as ped_file:

        for snp_id in annots:
            # transpose genotype matrix
            for ind, gt in zip(sample_names, gts[snp_id]):
                fam_dict[ind].append(gt)

        for ind in fam_dict:
            ped_file.write('\t'.join(fam_dict[ind]) + '\n')
Exemple #7
0
def differences(a, b, add_dependencies_for_modifications=True):
    a_keys = set(a.keys())
    b_keys = set(b.keys())
    keys_added = set(b_keys) - set(a_keys)
    keys_removed = set(a_keys) - set(b_keys)
    keys_common = set(a_keys) & set(b_keys)
    added = od((k, b[k]) for k in sorted(keys_added))
    removed = od((k, a[k]) for k in sorted(keys_removed))
    modified = od((k, b[k]) for k in sorted(keys_common) if a[k] != b[k])
    unmodified = od((k, b[k]) for k in sorted(keys_common) if a[k] == b[k])
    return added, removed, modified, unmodified
Exemple #8
0
    def test_etree(self, converter=None):
        'BadgerFish conversion from data to etree'
        eq = self.check_etree(converter or xmljson.badgerfish)

        # From https://developer.mozilla.org/en-US/docs/JXON#In_summary
        eq({'animal': {}}, '<animal/>')
        eq({'animal': 'Deka'}, '<animal>Deka</animal>')
        eq({'animal': 1}, '<animal>1</animal>')
        eq({'animal': {'@name': 1}}, '<animal name="1"/>')
        eq({'animal': {'@name': 'Deka', '$': 'is my cat'}},
           '<animal name="Deka">is my cat</animal>')
        eq({'animal': od([('dog', 'Charlie'), ('cat', 'Deka')])},
           '<animal><dog>Charlie</dog><cat>Deka</cat></animal>')
        eq({'animal': {'dog': ['Charlie', 'Mad Max']}},
           '<animal><dog>Charlie</dog><dog>Mad Max</dog></animal>')
        eq({'animal': {'$': ' in my house ', 'dog': 'Charlie'}},
           '<animal> in my house <dog>Charlie</dog></animal>')

        # TODO: handling split text
        # eq({'animal': {'$': ' in my house', 'dog': 'Charlie'}},
        #    '<animal> in my <dog>Charlie</dog> house</animal>')

        # Test edge cases
        eq('x', '<x/>')             # Strings become elements
        eq({})                      # Empty objects become empty nodes
        eq(od([                     # Multiple keys become multiple nodes
            ('x', {'@x': 1}),
            ('y', 'z')
        ]), '<x x="1"/>', '<y>z</y>')

        # Attributes
        eq({'p': {'@id': 1, '$': 'text'}}, '<p id="1">text</p>')
        eq({'div': {'@id': 2, '$': 'parent-text', 'p': {'$': 'text'}}},
            '<div id="2">parent-text<p>text</p></div>')

        # From http://www.sklar.com/badgerfish/
        # Text content of elements goes in the $ property of an object.
        eq({'alice': {'$': 'bob'}}, '<alice>bob</alice>')

        # Nested elements become nested properties
        eq({'alice': od([
            ('bob', {'$': 'charlie'}),
            ('david', {'$': 'edgar'})])},
           '<alice><bob>charlie</bob><david>edgar</david></alice>')

        # Multiple elements at the same level become array elements.
        eq({'alice': {'bob': [{'$': 'charlie'}]}},
           '<alice><bob>charlie</bob></alice>')
        eq({'alice': {'bob': [{'$': 'charlie'}, {'$': 'david'}]}},
           '<alice><bob>charlie</bob><bob>david</bob></alice>')

        # Attributes go in properties whose names begin with @.
        eq({'alice': {'$': 'bob', '@charlie': 'david'}},
            '<alice charlie="david">bob</alice>')
Exemple #9
0
    def test_etree(self):
        'GData conversion from etree to data'
        eq = self.check_etree(xmljson.gdata)

        # From https://developer.mozilla.org/en-US/docs/JXON#In_summary
        eq({'animal': {}}, '<animal/>')
        eq({'animal': 'Deka'}, '<animal>Deka</animal>')
        eq({'animal': 1}, '<animal>1</animal>')
        eq({'animal': {'name': 1}}, '<animal name="1"/>')
        eq({'animal': {'$t': 'is my cat'}},
           '<animal>is my cat</animal>')
        eq({'animal': od([('dog', {'$t': 'Charlie'}), ('cat', {'$t': 'Deka'})])},
           '<animal><dog>Charlie</dog><cat>Deka</cat></animal>')
        eq({'animal': od([('dog', 'Charlie'), ('cat', 'Deka')])},
           '<animal dog="Charlie" cat="Deka"/>')
        eq({'animal': {'dog': ['Charlie', 'Mad Max']}},
           '<animal><dog>Charlie</dog><dog>Mad Max</dog></animal>')
        eq({'animal': {'$t': ' in my house ', 'dog': {'$t': 'Charlie'}}},
           '<animal> in my house <dog>Charlie</dog></animal>')
        eq({'animal': {'$t': ' in my house ', 'dog': 'Charlie'}},
           '<animal dog="Charlie"> in my house </animal>')

        # Test edge cases
        eq('x', '<x/>')             # Strings become elements
        eq({})                      # Empty objects become empty nodes
        eq(od([                     # Multiple keys become multiple nodes
            ('x', {}),
            ('y', 'z')
        ]), '<x/>', '<y>z</y>')

        # Attributes
        eq({'p': {'$t': 'text'}}, '<p>text</p>')
        eq({'div': {'$t': 'parent-text', 'p': {'$t': 'text'}}},
            '<div>parent-text<p>text</p></div>')

        # Text content of elements goes in the $ property of an object.
        eq({'alice': {'$t': 'bob'}}, '<alice>bob</alice>')

        # Nested elements become nested properties
        eq({'alice': od([
            ('bob', {'$t': 'charlie'}),
            ('david', {'$t': 'edgar'})])},
           '<alice><bob>charlie</bob><david>edgar</david></alice>')

        # Multiple elements at the same level become array elements.
        eq({'alice': {'bob': [{'$t': 'charlie'}]}},
           '<alice><bob>charlie</bob></alice>')
        eq({'alice': {'bob': [{'$t': 'charlie'}, {'$t': 'david'}]}},
           '<alice><bob>charlie</bob><bob>david</bob></alice>')

        # Attributes go in properties whose names begin with @.
        eq({'alice': {'$t': 'bob'}},
            '<alice>bob</alice>')
Exemple #10
0
	def planes_of_expression(self):
		if self._eplane is None:
			c1=Counter([planes_of_expression[c][0] for c in self.full_name if onlyltrs(c)])
			c2=Counter([planes_of_expression[c][1] for c in self.full_name if onlyltrs(c)])
			self._eplane3=od([("physical",0),("emotional",0),("mental",0),("intuitive",0)])
			self._eplane4=od([("creative",0),("vacillating",0),("grounded",0)])
			for c in filter(onlyltrs,self.full_name): 
				k,k2=planes_of_expression[c]
				self._eplane3[k]=sum_digits(self.l2nmap[c]+self._eplane3[k])
				self._eplane4[k2]=sum_digits(self.l2nmap[c]+self._eplane4[k2])
			self._eplane,self._eplane2=c1.most_common()[0][0],c2.most_common()[0][0]
		return self._eplane,self._eplane2,self._eplane3,self._eplane4
Exemple #11
0
def plot_tags(tags_dic, save_fname):
    """
    Create and save plots for 'Graphs' option.
    These plot files shall be grabbed and included into UI.

    Args:
        | *tags_dic* (dict) -- dictionary of POS-tag occurrences
        | *save_fname* (str) -- currently processed file name without extension

    Returns:
        *odd* (OrderedDict) -- frequency sorted POS-tags

    """
    matplotlib.rc('font', **{'size': 13})
    # create POS-tags distribution plot
    odd = od(sorted([(k, v) for k, v in tags_dic.items()], key=lambda x: x[1]))
    bars = plt.barh(range(len(odd)), odd.values(), align='center')
    plt.title('Part-of-speech tags statistics')
    plt.yticks(range(len(odd)), odd.keys())
    plt.xlabel('Occurrence')
    plt.ylabel('POS-tags')
    plt.grid(True)
    plt.margins(y=0)
    random.shuffle(COLLECTION)
    for i in range(len(tags_dic)):
        bars[i].set_color(COLLECTION[i])
    plt.savefig(os.path.join('_graphs', save_fname + '.png'))
    # create functional / non-fuctional words pie chart
    plt.clf()
    matplotlib.rc('font', **{'size': 16})
    functional = ('DT', 'PDT', 'PRP', 'PRP$', 'IN', 'CC', 'UH', 'RP', 'WRB',
                  'WP$', 'WDT', 'WP', 'EX', 'MD', 'TO')
    content = ('JJ', 'JJR', 'JJS', 'NN', 'NNS', 'NNP', 'NNPS', 'RB', 'RBR',
               'RBS', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ')
    fwords = sum([tags_dic[k] for k in tags_dic if k in functional])
    cwords = sum([tags_dic[k] for k in tags_dic if k in content])
    try:
        fratio = round(fwords / (fwords + cwords) * 100, 1)
        cratio = round(cwords / (fwords + cwords) * 100, 1)
    except ZeroDivisionError:
        fratio = 0.0
        cratio = 0.0
    labels = ['functional', 'content']
    sizes = [fratio, cratio]
    pie_colors = ['salmon', 'royalblue']
    plt.pie(sizes, labels=labels, colors=pie_colors, autopct='%1.1f%%',
            shadow=True, startangle=90)
    # Set aspect ratio to be equal so that pie is drawn as a circle.
    plt.axis('equal')
    # increasing fonts in a pie chart
    plt.savefig(os.path.join('_graphs', save_fname + '_pie.png'))
    plt.clf()
    return od(reversed(list(odd.items())))
def main(args):
    paralogs = ["AB", "A", "B", "C", "D", "N"]
    start = 120535758
    stop = 120598753

    args = parse_args(args)
    wl = parse_whitelist(args.whitelist)

    outf = open(args.tsv, "w")

    samples = sorted(os.listdir(args.folder))
    sample_paths = [os.path.join(args.folder, x) for x in samples]
    names = [os.path.basename(x) for x in samples]

    final = od()

    for sample, n in izip(sample_paths, names):
        combined = od()
        vcfs = [os.path.join(sample, x) for x in ["AB.vcf", "A.vcf", "B.vcf", "C.vcf", "D.vcf", "N.vcf"]]
        for v, p in izip(vcfs, paralogs):
            combined[p] = make_R(v, wl[p], start, stop)
            final[n] = combined

    for sample, paralog_dict in final.iteritems():
        for paralog, csv in paralog_dict.iteritems():
            outf.write("\t".join(map(str, [sample + "_" + paralog] + csv)))
            outf.write("\n")

    outf.close()

    call = ["Rscript", "/cluster/home/ifiddes/code/n2nl_barplots.R", args.tsv, args.graph + "_full.png"]
    p1 = subprocess.Popen(call)

    outf = open(args.summary, "w")
    rearranged=od()

    for sample, paralog_dict in final.iteritems():
        for paralog, csv in paralog_dict.iteritems():
            if paralog not in rearranged:
                rearranged[paralog] = list()
            rearranged[paralog].append(csv)

    for paralog, csvs in rearranged.iteritems():
        outf.write("\t".join(map(str, ["median", paralog] + median(csvs)))); outf.write("\n")
        outf.write("\t".join(map(str, ["mean", paralog] + mean(csvs)))); outf.write("\n")
        outf.write("\t".join(map(str, ["var", paralog] + var(csvs)))); outf.write("\n")

    outf.close()

    call = ["Rscript", "/cluster/home/ifiddes/code/n2nl_summary_barplots.R", args.summary, args.graph + "_summary.png"]
    p2 = subprocess.Popen(call)
def opt_best_match(inverted_db, query, threshold):
   
    sorted_db = dict()
    query_db = dict()    

    for q in query:
        for term in q.split():
            if term in inverted_db:
                if term not in query_db:
                    query_db[term]= dict() 
                query_db[term]=inverted_db[term]


    temp =  sorted(query_db, key=lambda x:len(query_db[x]), reverse=True)
    sorted_db = od((x, query_db[x]) for x in temp)

    K = countK()
    best_docs = dict()
    best_docs2 = dict()
    count=0
    
    for parola in sorted_db:
        for doc in sorted_db[parola]:
            if count<K:
                if doc not in best_docs:
                    best_docs[doc]=0
                    count+=1
                best_docs[doc]+=sorted_db[parola][doc]
            else:
                if doc in best_docs:
                    best_docs[doc]+=sorted_db[parola][doc]

    #sorting
    temp =  sorted(best_docs, key=lambda x: best_docs[x], reverse=True)
    sorted_docs = od((x, best_docs[x]) for x in temp)

    count =0
    for doc in sorted_docs:
        if count < 20:
            count+=1
            if doc not in best_docs2:
                best_docs2[doc]=0
            best_docs2[doc]=sorted_docs[doc]

    temp2 =  sorted(best_docs2, key=lambda x: best_docs2[x], reverse=True)
    best_docs = od((x, best_docs[x]) for x in temp2)
	
    return best_docs  
def sort_tags(source):
    """
    return a sorted version of source, with the biggests tags first
    """
    output = od(sorted(Counter([e for e in source]).items(),
                key=lambda t: -t[1]))
    return output
Exemple #15
0
    def fill_mcmc_parameters(self):
        """
        Initializes the ordered dictionary :attr:`mcmc_parameters` from
        the input parameter file.

        It uses :meth:`read_file`, and initializes instances of
        :class:`parameter` to actually fill in :attr:`mcmc_parameters`.

        """

        # Define temporary quantities, only to simplify the input in the
        # parameter file
        self.parameters = od()

        # Read from the parameter file everything
        try:
            self.param_file = open(self.param, 'r')
        except IOError:
            raise io_mp.ConfigurationError(
                "Error in initializing the Data class, the parameter file " +
                "{0} does not point to a proper file".format(self.param))
        # In case the parameter file is a log.param, scan first once the file
        # to extract the path dictionnary.
        if self.param.find('log.param') != -1:
            self.read_file(self.param_file, search_path=True)
        self.read_file(self.param_file)

        for key, value in self.parameters.iteritems():
            self.mcmc_parameters[key] = Parameter(value, key)
        """
Exemple #16
0
    def test_query(self):
        from collections import OrderedDict as od

        cases = [
            ('', ''),
            ('', []),
            ('', {}),
            ('?name', 'name'),
            ('?name', [('name', None)]),
            ('?name', {'name': None}),
            ('?name=foo', 'name=foo'),
            ('?name=foo', [('name', 'foo')]),
            ('?name=foo', {'name': 'foo'}),
            ('?name=foo', {'name': ['foo']}),
            ('?name=42', [('name', 42)]),
            ('?name=42', {'name': 42}),
            ('?name=42', {'name': [42]}),
            ('?name=foo&type=bar', [('name', 'foo'), ('type', 'bar')]),
            ('?name=foo&type=bar', od([('name', 'foo'), ('type', 'bar')])),
            ('?name=foo&name=bar', [('name', 'foo'), ('name', 'bar')]),
            ('?name=foo&name=bar', {'name': ['foo', 'bar']}),
            ('?name=a%2Fb%2Fc', dict(name='a/b/c')),
            ('?name=a%3Ab%3Ac', dict(name='a:b:c')),
            ('?name=a%3Fb%3Fc', dict(name='a?b?c')),
            ('?name=a%40b%40c', dict(name='a@b@c')),
            ('?name=a%23b%23c', dict(name='a#b#c')),
            ('?name=a%26b%26c', dict(name='a&b&c')),
            ('?name=a%3Bb%3Bc', dict(name='a;b;c')),
        ]
        for uri, query in cases:
            self.check(uri, query=query)
        # invalid query type
        for query in (0, [1]):
            with self.assertRaises(TypeError, msg='query=%r' % query):
                uricompose(query=query)
Exemple #17
0
def templateInSubpage(sourcePrefix, destPrefix):
	generator = pagegenerators.PrefixingPageGenerator(prefix=sourcePrefix)
	for page in generator:
		if page.isRedirectPage():
			continue

		suffix = page.title().split('/')[1]
		dest = pywikibot.Page(pywikibot.getSite(), title=destPrefix + suffix)
		print dest
		if dest.exists():
			pywikibot.output(u"Page %s already exists" % dest.title())
			#page.put(u"#redirect [[%s]]" % dest.title(), comment="Redirectez către noua locatie a graficelor cu date demografice")
			continue

		try:
			text = page.get()
		except:
			pywikibot.output(u"Could not read %s" % page.title())
			continue

		output = od({})
		tpl = sf.tl2Dict(sf.extractTemplate(text, u"Demografia"))[0]
		for i in range(1,20):
			p1 = u"a" + str(i)
			p2 = u"p" + str(i)
			if p1 in tpl and p2 in tpl:
				output[i] = {"year": tpl[p1], "pop": tpl[p2]}


		text = u"an,populatie\n"
		for elem in output:
			text += "%s,%s\n" % (output[elem]["year"], output[elem]["pop"])
		print text
		dest.put(text,comment="Creez o nouă pagină cu date demografice")
def __parse_log(ins, pat_time):
    result = od()
    suite = None
    while True:
        try:
            line = ins.next()
            m = _pat_suite.match(line)
            if m:
                suite = m.groups()[0]
                result[suite] = {}
                #print('suite:', suite)
                continue
            m = _pat_case.match(line)
            if m:
                case = m.groups()[0]
                #print('case:', case, end='\t')
                while True:
                    line = ins.next()
                    m = _pat_sep.match(line)
                    if m: break
                    m = pat_time.match(line)
                    if m:
                        result[suite][case] = float(m.groups()[0])
                        #print('time:', m.groups()[0])
                        break
        except StopIteration:
            break
    return result
def combine_basic_match_page_rank(basic_best_docs, page_ranks, output, i):
    print >> output, "///////////////////////// Combining Basic Best Match and Page Rank /////////////////////////"
    print >> output, "///////////////////////// Iterazione: ",i," /////////////////////////"

    start_time = timeit.default_timer()
    
    tmp = dict()
    for doc in basic_best_docs:
        for doc2 in page_ranks:
            if doc==doc2:
            		if doc not in tmp:
                		tmp[doc] = 0
            		tmp[doc] = page_ranks[doc]
		

    temp =  sorted(tmp, key=lambda x: tmp[x], reverse=True)
    sorted_docs = od((x, tmp[x]) for x in temp)
    
    combine_basic_match_page_rank_elapsed = timeit.default_timer() - start_time

    print >> output, "Tempo impiegato: ", str(combine_basic_match_page_rank_elapsed)

    print >> output, "Migliori 20 documenti con relativo page rank: "
    for doc in sorted_docs:
        print >> output, doc, str(sorted_docs[doc])
Exemple #20
0
def process_text(*args):
    """
    Process loaded text with textblob toolkit.
    Calculate text statistics.

    Args:
        *args* (list) -- PriorityQueue and raw text data

    Returns:
        | *parsed_text* (Blobber) -- Blobber obj which contains parse results
        | *full_tagged_sents* (dict) -- dict of
          *{send num: {word num: (word, POS-tag)}}*

    """
    # TextBlob runs POS-tagging
    model_queue, text = args
    parsed_text = TextBlob(text)
    # POS-tagging with nltk again because TextBlob sent.tags is too slow
    tagger = PerceptronTagger()
    tagset = None
    sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
    sents_tokenized = sent_detector.tokenize(text)
    tokenized = []
    for sent in sents_tokenized:
        tokenized.append(nltk.tokenize.word_tokenize(sent, language='english'))
    pos_sents = od()
    for i, sent_toks in enumerate(tokenized):
        pos_text = nltk.tag._pos_tag(sent_toks, None, tagger)
        joined_tags = [(pos[0], 'PUNC' if pos[1] not in NLTK_PENN else pos[1],
                        n) for n, pos in enumerate(pos_text)]
        pos_sents[i] = joined_tags
    model_queue.put([parsed_text, pos_sents])
Exemple #21
0
    def fill_mcmc_parameters(self):
        """
        Initializes the ordered dictionary :attr:`mcmc_parameters` from
        the input parameter file.

        It uses :meth:`read_file`, and calls
        :meth:`from_input_to_mcmc_parameters` to actually fill in
        :attr:`mcmc_parameters`.

        """

        # Define temporary quantities, only to simplify the input in the
        # parameter file
        self.parameters = od()

        # Read from the parameter file everything
        try:
            self.param_file = open(self.param, 'r')
        except IOError:
            io_mp.message(
                "Error in initializing the data class, the parameter file \
                {0} does not point to a proper file".format(self.param),
                "error")
        self.read_file(self.param_file)

        # Transform from parameters dictionnary to mcmc_parameters dictionary
        # of dictionaries, method defined just below
        self.from_input_to_mcmc_parameters(self.parameters)
 def __init__(self, name, date, project, description=None):
     self.name = name
     self.project = project
     self.date = date
     self.description = description if description is not None else ""
     ana_name = self.date + "_" + self.name
     self.ana_dir = jn(project.path, "analyses",ana_name)
     logging.info("Analysis {} instantiated.".format(ana_name))
     assert os.path.isdir(self.ana_dir), ("{} does not exist, create "
                  "it by calling create_new_analysis()".format(self.ana_dir))
     if self.description:
         with open(jn(self.ana_dir,"README"),'w') as f:
             f.write(self.date+'_'+self.name+'\n')
             try:
                 f.write("Analysis notebook can be found in: " + \
                         callingframe.f_locals["_dh"])
             except:
                 pass
             f.write('-' * 60 + '\n')
             f.write(description + '\n')
             f.write('-' * 60 + '\n')
             f.write('Data produced by this analysis can be found in ./_data\n')
             f.write("See ./log and ./jobscripts for more information\n")
             f.write("In ./log each analysis step is documented in a .README file\n")
             #TODO: We should define a workflow chain and procude a log file for this.
            
     try:
         with open(jn(self.ana_dir,"log","steps.json")):
             pass
     except:
         pass
     self.steps = od()
Exemple #23
0
    def proxy_repr(self, pprint=False, outfile=True, json_path='./jsons/proxies'):
        """
        proxy_dict is an OrderedDict
        """
        proxy_dict = od()
        proxy_dict['sitename'] = self.sitename
        proxy_dict['coords'] = self.coords
        proxy_dict['season'] = self.season
        proxy_dict['dataset'] = self.dataset
        proxy_dict['variable'] = self.variable
        proxy_dict['calc_anoms'] = self.calc_anoms
        proxy_dict['detrend'] = self.detrend
        proxy_dict['value'] = self.value
        proxy_dict['climatology'] = self.climatology
        proxy_dict['period'] = self.period
        proxy_dict['extracted_coords'] = self.extracted_coords.tolist()
        proxy_dict['distance_point'] = self.distance_point
        proxy_dict['trend_params'] = self.trend_params
        proxy_dict['category'] = self.category
        proxy_dict['analog_years'] = self.analog_years.tolist()

        if pprint:
            pprint_od(proxy_dict)

        if outfile:
            proxy_name = self.sitename.replace(" ","_")
            proxy_name = proxy_name.replace(".","")
            #proxy_name =
            fname = "{}.json".format(self.sitename.replace(" ","_"))
            with open(os.path.join(json_path, fname),'w') as f:
                json.dump(proxy_dict, f)
        self.proxy_dict = proxy_dict
Exemple #24
0
def get_ngrams(txtblob_obj):
    """
    Calculate word and ngram counts for Graphs option.
    Calculate top n frequent words.
    Calculate top n 2-grams
    Calculate top n 3-grams

    Args:
        *txtblob_obj* (Blob) -- object containing parse results

    Returns:
        |*mostn* (list) -- a list of n most frequent words
        |*ngram2* (list) -- a list of n most frequent 2-grams
        |*ngram3* (list) -- a list of n most frequent 3-grams

    """
    counter = Counter(txtblob_obj[0].words)
    counts_dic = od(counter.most_common())
    tags_dic = dict(txtblob_obj[0].tags)
    # POS-tags included into most frequent words list
    include = ('JJ', 'JJR', 'JJS', 'NN', 'NNS', 'NNP', 'NNPS', 'VB', 'VBG')
    # get n most frequent words
    mostn = [(k, counts_dic[k])
             for k in counts_dic if tags_dic.get(k) in include][:10]
    ngram2_cnt = Counter([(n[0], n[1]) for n in txtblob_obj[0].ngrams(2)])
    ngram3_cnt = Counter([(n[0], n[1], n[2]) for n
                          in txtblob_obj[0].ngrams(3)])
    ngram2 = [(n[0], ngram2_cnt[n[0]]) for n in ngram2_cnt.most_common(10)]
    ngram3 = [(n[0], ngram3_cnt[n[0]]) for n in ngram3_cnt.most_common(10)]
    return mostn, ngram2, ngram3
Exemple #25
0
def parse_enhancements(params):
    '''Parse image enhancements and their parameters, if any.

    :param params: list of enhancement names and parameters
    :type params: list of strings
    :rtype: ordered dictionary

    example of *params*: ['usm:20,8', 'br']
    '''

    output = od()
    if len(params) > 0:
        LOGGER.debug("Parsing enhancements.")
    for param in params:
        param = param.split(":")
        if len(param) > 1:
            name, args = param
            args = args.split(',')
            arg_list = []
            for arg in args:
                try:
                    arg = float(arg)
                except ValueError:
                    pass
                arg_list.append(arg)
                
            output[name] = arg_list
        else:
            output[param[0]] = None

    return output
def main(args):
    logger.info('loading pickle: %s' % args.pickle)

    uuids = od() 

    with open(args.uuids) as uuid_in:
        for line in uuid_in:
            uuid = line.strip().split()[0]
            uuids[uuid] = True

    insertions = []

    with open(args.pickle, 'r') as pickin:
        insertions = pickle.load(pickin)

    logger.info('finished loading %s' % args.pickle)
    logger.info('raw candidate count: %d' % len(insertions))

    filtered = []

    for ins in insertions:
        if ins['INFO']['ins_uuid'] in uuids:
            filtered.append(ins)

    logger.info('kept %d records' % len(filtered))

    with open(args.out, 'w') as pickout:
        pickle.dump(filtered, pickout)
def parse(fname):
    d = od()
    for line in file(fname):
        line = "".join([i if ord(i) < 128 else " " for i in line])
        toks = line.split()
        num, category, operator = toks[0], toks[1], " ".join(toks[2:])
        d[(category, operator)] = num
    return d
Exemple #28
0
def parse_new(soup,filename):
    """
    parse a 2016 era webpage read from file filename

    Parameters
    ----------

    soup:  a beautiful soup object
    filename:  the original html filename (for the metadata)
    
    Returns
    -------

    keep_dict:  an ordered dictionary with metadata and temperature forecasts for the week
    
    """
    keep_dict=od()
    keep_dict['filename'] = filename
    #
    # get the metadata from the right column of the "about_forecast" table
    #
    info = soup.findAll("div", { "id" : "about_forecast" })
    lines=list(info[0].children)
    location=lines[1].findAll('div', {"class" : "right"})
    placename,latlon = list(location[0].children)
    keep_dict['placename'] = placename.strip()
    latlon = latlon.text.strip()
    #
    # change unicode degree sign to "deg"
    #
    latlon = latlon.replace('\u00b0',' deg ')
    keep_dict['location'] = latlon
    update=lines[3].findAll('div', {"class" : "right"})
    keep_dict['last_update'] = update[0].text.strip()
    valid=lines[5].findAll('div', {"class" : "right"})
    keep_dict['valid'] = valid[0].text.strip()
    #
    # get the days and temps from two columns of the forecast table
    #
    forecast_times=soup.findAll('div', {"class" : "col-sm-2 forecast-label"})
    forecast_temps=soup.findAll('div', {"class" : "col-sm-10 forecast-text"})
    keep_dict['temps'] = od()
    for the_time,the_temp in zip(forecast_times,forecast_temps):
        match = find_temp.match(the_temp.text)
        keep_dict['temps'][the_time.b.text] = int(match.group(1))
    return keep_dict
Exemple #29
0
	def load_mappings(self):
		self.mappings=od()
		mappings_path=QtCore.QDir("mappings:/")
		for i in mappings_path.entryList():
			if i in (".",".."):
				continue
			mapping=mappings_path.absoluteFilePath(i)
			self.mappings[os.path.splitext(i)[0].title()]=LetterMapping(mapping)
Exemple #30
0
 def od(self):
     """
     View Ami event as ordered dict (most useful).
     """
     event = self._event
     if hasattr(self, "_extra"):
         event += self._extra
     return od(line.t for line in event)
 def load_all(self):
     self.load_schemas()
     self.load_all_relations()
     self.load_functions()
     self.selectables = od()
     self.selectables.update(self.relations)
     self.selectables.update(self.functions)
     self.load_deps()
     self.load_deps_all()
Exemple #32
0
 def __init__(self,sequence='',name='DNA',validate_sequence=False):
     self.parial_dict = {
         't7':(r_compile('\w*?'+self.deg_replace('TAATACGACTCACTATAGN'),flags=ig),'NN'),
         'sp6':(r_compile('\w*?'+self.deg_replace('ATTTAGGTGACACTATAGN'),flags=ig),'N'),
         't3':(r_compile('\w*?'+self.deg_replace('AATTAACCCTCACTAAAGN'),flags=ig),'N'),
         'sgRNA_overlap':(r_compile(self.deg_replace('GTTTTAGAGCTAGAAN')+'*',flags=ig),''),
         'attB1R_primer': (r_compile('\w*?'+self.deg_replace('ACTGCTTTTTTGTACAAACTTG'),flags=ig),''),
         'attB1_primer': (r_compile('\w*?'+self.deg_replace('ACAAGTTTGTACAAAAAAGCAGGCT'),flags=ig),''),
         'attB2R_primer': (r_compile('\w*?'+self.deg_replace('ACCACTTTGTACAAGAAAGCTGGGT'),flags=ig),''),
         'attB2_primer': (r_compile('\w*?'+self.deg_replace('ACAGCTTTCTTGTACAAAGTGG'),flags=ig),''),
         'attB3R_primer': (r_compile('\w*?'+self.deg_replace('ACAACTTTGTATAATAAAGTTG'),flags=ig),''),
         'attB4_primer': (r_compile('\w*?'+self.deg_replace('ACAACTTTGTATAGAAAAGTTG'),flags=ig),'')}
     self.sequence = r_sub('[\W]','',sequence)
     self.valid_characters = {'A','T','C','G','N','H','D','V','B','K','M','Y','R','W','S'}
     if validate_sequence and not self.validate_sequence(self.sequence):
         raise self.SequenceError("Sequence contains invalid characters")
     self.name = name
     self.file_name = None
     self._save_path = plp.cwd()
     self.features = od()
     self.comments = []
     self.length = len(self.sequence)
     self._ape_header1 = 'LOCUS\nACCESSION\nVERSION\n'
     self._ape_header2 = 'COMMENT     ApEinfo:methylated:1\nFEATURES{}Location/Qualifiers\n'.format(' '*13)                 
     self._ape_feature = '     {7}{5}{0[0]}..{0[1]}{6}\n{4}/label={1}\n{4}/ApEinfo_fwdcolor="{2}"\n{4}/ApEinfo_revcolor="{3}"\n{4}/ApEinfo_graphicformat="arrow_data {{{{0 1 2 0 0 -1}} {{}} 0}}\n{4}width 5 offset 0"\n'
     self.color_dict = od(
     [('Cayenne', '#800000'), ('Asparagus', '#808000'), ('Clover', '#008000'), ('Teal', '#008080'), 
     ('Midnight', '#000080'), ('Plum', '#800080'), ('Tin', '#7F7F7F'), ('Nickel', '#808080'), 
     ('Mocha', '#804000'), ('Fern', '#408000'), ('Moss', '#008040'), ('Ocean', '#004080'), 
     ('Eggplant', '#400080'), ('Maroon', '#800040'), ('Steel', '#666666'), ('Aluminum', '#999999'), 
     ('Marascino', '#FF0000'), ('Lemon', '#FFFF00'), ('Spring', '#00FF00'), ('Turquoise', '#00FFFF'), 
     ('Blueberry', '#0000FF'), ('Magenta', '#FF00FF'), ('Iron', '#4C4C4C'), ('Magnesium', '#B3B3B3'), 
     ('Tangerine', '#FF8000'), ('Lime', '#80FF00'), ('SeaFoam', '#00FF80'), ('Aqua', '#0080FF'), 
     ('Grape', '#8000FF'), ('Strawberry', '#FF0080'), ('Tungsten', '#333333'), ('Silver', '#CCCCCC'), 
     ('Salmon', '#FF6666'), ('Banana', '#FFFF66'), ('Flora', '#66FF66'), ('Ice', '#66FFFF'), 
     ('Orchid', '#6666FF'), ('Bubblegum', '#FF66FF'), ('Lead', '#191919'), ('Mercury', '#E6E6E6'), 
     ('Cantaloupe', '#FFCC66'), ('Honeydew', '#CCFF66'), ('Spindrift', '#66FFCC'), ('Sky', '#66CCFF'), 
     ('Lavender', '#CC66FF'), ('Carnation', '#FF6FCF'), ('Licorice', '#000000'), ('Snow', '#FFFFFF'),
     ('black', 'black'), ('blue', 'blue'), ('brown', 'brown'), ('cyan', 'cyan'), ('green', 'green'), 
     ('magenta', 'magenta'), ('orange', 'orange'), ('purple', 'purple'), ('red', 'red'), 
     ('yellow', 'yellow'), ('white', 'white')]
     )    
     self._color_series = ['#FF0000', '#FFFF00', '#00FFFF', '#80FF00', '#0000FF', '#FF00FF', '#0080FF', '#FF8000', '#00FF80', '#8000FF', '#FF0080']
     self._color_pos = 0
Exemple #33
0
    def __getattr__(self, name):
        if name == "non_pk_constraints":
            a = self.i_from.constraints.items()
            b = self.i_target.constraints.items()
            a_od = od((k, v) for k, v in a if v.constraint_type != PK)
            b_od = od((k, v) for k, v in b if v.constraint_type != PK)
            return partial(statements_for_changes, a_od, b_od)

        elif name == "pk_constraints":
            a = self.i_from.constraints.items()
            b = self.i_target.constraints.items()
            a_od = od((k, v) for k, v in a if v.constraint_type == PK)
            b_od = od((k, v) for k, v in b if v.constraint_type == PK)
            return partial(statements_for_changes, a_od, b_od)

        elif name == "selectables":
            return partial(
                get_selectable_changes,
                od(sorted(self.i_from.selectables.items())),
                od(sorted(self.i_target.selectables.items())),
                self.i_from.enums,
                self.i_target.enums,
            )

        elif name == "triggers":
            return partial(
                get_trigger_changes,
                od(sorted(self.i_from.triggers.items())),
                od(sorted(self.i_target.triggers.items())),
                od(sorted(self.i_from.selectables.items())),
                od(sorted(self.i_target.selectables.items())),
                self.i_from.enums,
                self.i_target.enums,
            )

        elif name in THINGS:
            return partial(
                statements_for_changes,
                getattr(self.i_from, name),
                getattr(self.i_target, name),
            )

        else:
            raise AttributeError(name)
Exemple #34
0
def parse_html_template(html):
    '''
    Returns the evaluated html as an ordered dictionary of sections. This dictionary follows this format:
    
    <block_name> - the key to the section defined as the block name
        - <type> - the type of tag it is
        - <html> - a string that preserves the order of the childblocks and the innerhtml defined there
        - <child_blocks> (optional)- an ordered dictionary of blocks with the key being the name of the block
        - <parent> (optional) - the parent block that this rests in, creating a two way link between block and childblock
        - <tag> - the same as the key
        
    @param html: the template as a string
    '''
    sections = od()
    tag_stack = []
    out = ""
    in_html_tag = False
    i = 0
    while i < len(html):
        c = html[i]
        if is_start_tag(c, i, html):
            i, tag = parse_tag(i + 2, html)  #skip creation modulo
            out = add_tag(sections, tag_stack, tag, out)
        elif c == '%' and not in_html_tag:
            try:
                i, tag = parse_tag(i + 1, html)
            except SyntaxError:
                out += c  #Modulo is allowed
                i += 1
            raise SyntaxError(
                "Modulo found outside of html tag and without an opening brace at char '{}' for tag '{}'."
                .format(i, tag))
        elif c == '<':
            in_html_tag = True
            i += 1
            out += c
        elif c == '<' and in_html_tag:
            raise SyntaxError(
                "Character < found inside of an html tag at char '{}'".format(
                    i))
        elif c == '>':
            in_html_tag = False
            i += 1
            out += c
        else:
            out += c
            i += 1

    if tag_stack:
        raise KeyError(
            "The following blocks did not have an end tag defined: '{}'".
            format(tag_stack))
    if sections["parent"]["type"] == "head":
        sections["foot"] = {"type": "foot", "html": "out"}

    return sections
Exemple #35
0
    def load_types(self):
        q = self.c.execute(self.TYPES_QUERY)

        def col(defn):
            return defn["attribute"], defn["type"]

        types = [
            InspectedType(i.name, i.schema, dict(col(_) for _ in i.columns)) for i in q
        ]  # type: list[InspectedType]
        self.types = od((t.signature, t) for t in types)
Exemple #36
0
 def generate_webms(self):
     print('Generating webms')
     for node in self.todo['webm']:
         node = node.split('.')[0]
         infile = os.path.join(ORIG, self.orig[node])
         outfile = os.path.join(COMPUTED, '{}.webm'.format(node))
         ff = ffmpy3.FFmpeg(inputs=od([(infile, None)]),
                            outputs={outfile: "-an -map 0:v -vf scale=640:360:force_original_aspect_ratio=decrease -b:v 900k -codec:v libvpx -auto-alt-ref 0"})
         ff.run()
     self.todo['webm'] = []
Exemple #37
0
 def info(self):
     from collections import OrderedDict as od
     info = od()
     info['id'] = self.id
     info['events'] = self.events
     info['tasks_in'] = self.tasks_in
     info['tasks_out'] = self.tasks_out
     info['pending_clients_demands'] = self.clients_demands_pending_answer
     info['pending_replies'] = self.pending_replies
     return info
Exemple #38
0
def ParseKaryotypes(karyotypes):
    """
    """
    karyoreader = reader(open(karyotypes), delimiter="\t")
    karyodict = od()

    for row in karyoreader:
        karyodict[row[1]] = [row[0], row[2], row[3]]

    return karyodict
Exemple #39
0
    def __init__(self,
                 filename=None,
                 header=None,
                 terms=None,
                 typedefs=None,
                 instances=None):
        self.filename = filename
        self.Terms = od()
        self.Terms.names = {}
        self.Typedefs = od()
        self.Typedefs.names = {}
        self.Instances = od()
        self.Instances.names = {}
        self.Headers = od()  #LOL STUPID FIXME
        self.Headers.names = {}  # FIXME do not want? what about imports?
        if filename is not None:  # FIXME could spec filename here?
            #od_types = {type_.__name__:type_od for type_,type_od in zip((Term, Typedef, Instance),(self.Terms,self.Typedefs,self.Instances))}
            #LOL GETATTR
            with open(filename, 'rt') as f:
                data = f.read()
            #deal with \<newline> escape
            data = data.replace(' \n',
                                '\n')  # FXIME need for arbitrary whitespace
            data = data.replace('\<newline>\n', ' ')
            # TODO remove \n!.+\n
            sections = data.split('\n[')
            header_block = sections[0]
            self.header = Header(header_block, self)
            stanzas = sections[1:]
            for block in stanzas:
                block_type, block = block.split(']\n', 1)
                type_ = stanza_types[block_type]
                #odt = od_type[block_type]
                t = type_(block, self)  # FIXME :/
                self.add_tvpair_store(t)

        elif header is not None:
            self.header = header
            self.Terms = terms  # TODO this should take iters not ods
            self.Typedefs = typedefs
            self.Instances = instances
        elif header is None:
            self.header = None
Exemple #40
0
    def __init__(self,
                 data=None,
                 altitude=None,
                 dtime=None,
                 var_name=None,
                 data_err=None,
                 var_unit=None,
                 altitude_unit=None,
                 **location_info):

        if data is None:
            data = []
        if data_err is None:
            data_err = []
        if dtime is None:
            dtime = []
        if altitude is None:
            altitude = []
        if var_name is None:
            var_name = 'data'

        self._var_name = None
        self._data = []
        self._data_err = []
        self._altitude = []
        self._vert_coord_name = None
        self._vert_coord_vals = od()

        self.var_info = BrowseDict()
        self.var_info['altitude'] = od()

        self.update(**location_info)

        self.var_name = var_name
        self.dtime = dtime
        self.data = data
        self.data_err = data_err
        self.altitude = altitude

        if var_unit is not None:
            self.var_unit = var_unit
        if altitude_unit is not None:
            self.altitude_unit = altitude_unit
Exemple #41
0
 def load_comments(self):
     q = self.c.execute(self.COMMENTS_QUERY)
     comments = [
         InspectedComment(schema=c.nspname,
                          object_type=c.objtype,
                          object_name=c.objname,
                          object_subname=c.objsubname,
                          comment=c.description) for c in q
     ]
     self.comments = od((i.key, i) for i in comments)
def generate_step(workflow_key, resource, state, check_mode, step_name=None):
    if step_name is None:
        step_name = 'Processing resource `%s` for state `%s` with check_mode `%s`' % (workflow_key, state, check_mode)
    step = od([
        ('name', step_name),
        ('include_tasks', od([
            ('file', 'tasks/nitro_resource_task.yaml'),
            ('apply', od([
                ('vars', od([
                    ('resource_name', workflow_key),
                    ('state', state),
                    ('check_mode', check_mode),
                    ('workflow_dict', '{{ workflow.%s }}' % workflow_key),
                    ('resource_attributes', copy.deepcopy(resource)),
                ])),
            ])),
        ])),
     ])
    return step
Exemple #43
0
def create_basic_section_workflows(args, workflows):
    workflows['server'] = od([
        ('lifecycle', 'object'),
        ('endpoint', 'server'),
        ('primary_id_attribute', 'name'),
        ('resource_missing_errorcode', '258'),
        ('allow_recreate', 'true'),
        ('non_updateable_attributes', _get_non_updateable_attributes(args, 'server')),
    ])

    workflows['service'] = od([
        ('lifecycle', 'object'),
        ('endpoint', 'service'),
        ('primary_id_attribute', 'name'),
        ('resource_missing_errorcode', '344'),
        ('allow_recreate', 'true'),
        ('non_updateable_attributes', _get_non_updateable_attributes(args, 'service')),
    ])

    workflows['servicegroup'] = od([
        ('lifecycle', 'object'),
        ('endpoint', 'servicegroup'),
        ('primary_id_attribute', 'servicegroupname'),
        ('resource_missing_errorcode', '258'),
        ('allow_recreate', 'true'),
        ('non_updateable_attributes', _get_non_updateable_attributes(args, 'servicegroup')),
    ])

    workflows['service_lbmonitor_binding'] = od([
        ('lifecycle', 'binding'),
        ('endpoint', 'service_lbmonitor_binding'),
        ('bound_resource_missing_errorcode', '258'),
        ('primary_id_attribute', _get_bindig_id_attributes(args, 'service_lbmonitor_binding')[1]),
        ('delete_id_attributes', _get_bindig_id_attributes(args, 'service_lbmonitor_binding')[0]),
    ])

    workflows['servicegroup_lbmonitor_binding'] = od([
        ('lifecycle', 'binding'),
        ('endpoint', 'servicegroup_lbmonitor_binding'),
        ('bound_resource_missing_errorcode', '351'),
        ('primary_id_attribute', _get_bindig_id_attributes(args, 'servicegroup_lbmonitor_binding')[1]),
        ('delete_id_attributes', _get_bindig_id_attributes(args, 'servicegroup_lbmonitor_binding')[0]),
    ])
Exemple #44
0
def init_bindings(sig: Signature) -> od:
    bids = od([])
    for n, p in sig.parameters.items():
        if p.kind is Parameter.VAR_POSITIONAL:
            bids[n] = tuple()
        elif p.kind is Parameter.VAR_KEYWORD:
            bids[n] = dict()
        else:
            bids[n] = p.default
    return bids
 def test_switcher_function(self):
     tests_path = os.path.join('test_data', 'switcher_function_tests.xml')
     for etr in load_xml_tests(tests_path).values():
         tup_arg = ast.literal_eval(etr.findtext('tup_arg'))
         remapped = od(ast.literal_eval(etr.findtext('remapped')))
         deptype = etr.findtext('deptype')
         govs = ast.literal_eval(etr.findtext('govs'))
         head = ast.literal_eval(etr.findtext('head'))
         self.assertEqual(
             hs.alg_controller(tup_arg, remapped, deptype, govs), head)
 def info_init(self):
     """Empty dictionary containing init values of infos to be
     extracted from filenames
     """
     return od(year=None,
               var_name=None,
               ts_type=None,
               vert_code='',
               is_at_stations=False,
               data_id='')
def copy_key(redis_client, ledis_client, key, convert=False):
    global entries
    k_type = redis_client.type(key)

    if k_type == "string":
        value = redis_client.get(key)
        ledis_client.set(key, value)
        set_ttl(redis_client, ledis_client, key, k_type)
        entries += 1

    elif k_type == "list":
        _list = redis_client.lrange(key, 0, -1)
        for value in _list:
            ledis_client.rpush(key, value)
        set_ttl(redis_client, ledis_client, key, k_type)
        entries += 1

    elif k_type == "hash":
        mapping = od(redis_client.hgetall(key))
        ledis_client.hmset(key, mapping)
        set_ttl(redis_client, ledis_client, key, k_type)
        entries += 1

    elif k_type == "zset":
        # dangerous to do this?
        out = redis_client.zrange(key, 0, -1, withscores=True)
        pieces = od()
        for i in od(out).iteritems():
            pieces[i[0]] = int(i[1])
        ledis_client.zadd(key, **pieces)
        set_ttl(redis_client, ledis_client, key, k_type)
        entries += 1

    elif k_type == "set":
        mbs = list(redis_client.smembers(key))

        if mbs is not None:
            ledis_client.sadd(key, *mbs)
            set_ttl(redis_client, ledis_client, key, k_type)
            entries += 1

    else:
        print "KEY %s of TYPE %s is not supported by LedisDB." % (key, k_type)
Exemple #48
0
def load_varconfig_ini(fpath):
    cfg = ConfigParser(allow_no_value=True)
    cfg.optionxform = str
    cfg.read(fpath)
    sections = cfg.sections()
    vals_raw = cfg._sections
    result = od()
    for key in sections:
        result[key] = list(vals_raw[key].keys())
    return result
Exemple #49
0
    def has_compatible_columns(self, other):

        items = list(self.columns.items())

        if self.relationtype != "f":
            old_arg_count = len(other.columns)
            items = items[:old_arg_count]

        items = od(items)
        return items == other.columns
Exemple #50
0
    def mkpspl(self, pltrg, statement):
        '''
        PL データをもとに比例縮尺損益計算書を作成
        '''

        # 利益の計算
        if "earnings" not in statement.keys():
            statement["earnings"] = od([("営業利益",
                                         sum(statement["income"].values()) -
                                         sum(statement["expenses"].values()))])

        # PL の図示

        # 黒字企業の場合
        if sum(statement["earnings"].values()) > 0:

            # 利益
            bottom = self._displayItems(pltrg, statement["earnings"], 1, 0,
                                        "#DCEDC8")

            # 費用
            self._displayItems(pltrg, statement["expenses"], 1, bottom,
                               "#FFF9C4")

            # 収益
            self._displayItems(pltrg, statement["income"], 2, 0, "#FFE0B2")

        # 赤字企業の場合
        else:

            # 費用
            self._displayItems(pltrg, statement["expenses"], 1, 0, "#FFF9C4")

            # 利益
            bottom = self._displayItems(pltrg, statement["earnings"], 2, 0,
                                        "#DCEDC8")

            # 収益
            self._displayItems(pltrg, statement["income"], 2, bottom,
                               "#FFE0B2")

        # title
        if self.pltitle:
            pltrg.set_title(self.pltitle)

        # x軸
        pltrg.tick_params(labelbottom="off", bottom="off")

        # y 軸
        if self.noylab:
            pltrg.tick_params(labelleft="off", left="off")

        # ylim
        if self.basis:
            pltrg.set_ylim((0, self.basis))
Exemple #51
0
def dis_to_known_site_plot(in_isoform_res,
                           max_dis=30,
                           out_plot='dis_to_known.png'):
    ut.err_format_time(
        'dis_to_known_site_plot',
        'Plotting dis_known_site_plot for {} ... '.format(in_isoform_eval_out))
    internal_dis_dict = dd(lambda: 0)
    back_dis_dict = dd(lambda: 0)
    dis_list = [i for i in range(-max_dis, max_dis + 1)]
    xlabel = 'Distance'
    ylabel = 'log10(Read count + 1)'

    with open(in_isoform_res) as in_fp:
        for line in in_fp:
            if line.startswith('#'): continue
            ele = line.rsplit()
            get_dis_to_known(ele, internal_dis_dict, back_dis_dict)
    for i in dis_list:
        internal_dis_dict[i] += 0
        back_dis_dict[i] += 0

    inter_od_dict = od([(i, math.log10(internal_dis_dict[i] + 1))
                        for i in dis_list])
    back_od_dict = od([(i, math.log10(back_dis_dict[i] + 1))
                       for i in dis_list])
    dis_dict = {
        'Internal splice-site': list(inter_od_dict.values()),
        'Back-splice-site': list(back_od_dict.values())
    }
    # print dis_dict
    dkp.dis_to_known_plot(out_fig=out_plot,
                          in_dict=dis_dict,
                          subgroup=[1, 1],
                          title='Distance to known splice-site',
                          xticks=dis_list,
                          xlabel=xlabel,
                          ylabel=ylabel)
    ut.err_format_time(
        'dis_to_known_site_plot',
        'Plotting dis_known_site_plot for {} done!'.format(
            in_isoform_eval_out))
    return
 def __init__(self, **kwargs):
     self._head_fix = od(num_head_lines = np.nan,
                         num_head_fmt = np.nan,
                         data_originator = "",
                         sponsor_organisation = "",
                         submitter = "",
                         project_association = "",
                         vol_num = np.nan,
                         vol_totnum = np.nan,
                         ref_date = np.nan,
                         revision_date = np.nan,
                         freq = np.nan,
                         descr_time_unit = "",
                         num_cols_dependent = np.nan,
                         mul_factors = [],
                         vals_invalid = [],
                         descr_first_col = "")
     self._var_defs = []
     self._meta = od()
     self.update(**kwargs)
def load_xml_tests(test_name):
    '''This function reads test_name xml file and returns an ordered dict of
    parsed etree objects'''
    with open(test_name, 'r') as f:
        fdata = f.read()
    etrees = od()
    for sent in enumerate(fdata.split('\n\n')):
        if not sent[1]:
            continue
        etrees[sent[0]] = et.fromstring(sent[1])
    return etrees
Exemple #54
0
    def __init__(self, forms_dict=None):
        """Class initialisation."""
        if forms_dict is None:
            forms_dict = {}
        self._forms = od()
        self.id_count = 0

        self.type = ""

        for key, val in six.iteritems(forms_dict):
            self[key] = val
Exemple #55
0
 def zero(self, rail_h, cb_menu_asks_display_to_clear,
          cb_menu_asks_display_to_write, cb_menu_selection, height, width,
          title):
     self.rail_h = rail_h
     self.cb_menu_asks_display_to_clear = cb_menu_asks_display_to_clear
     self.cb_menu_asks_display_to_write = cb_menu_asks_display_to_write
     self.height = height
     self.width = width
     self.title = title
     #
     self.d_menu = od()
def getHashSums(file_path):
    hashSums = od()
    hashSums['md5sum'] = hashlib.md5()
    hashSums['sha1sum'] = hashlib.sha1()
    hashSums['sha224sum'] = hashlib.sha224()
    hashSums['sha256sum'] = hashlib.sha256()
    hashSums['sha384sum'] = hashlib.sha384()
    hashSums['sha512sum'] = hashlib.sha512()

    with open(file_path, 'rb') as fd:
        dataChunk = fd.read(1024)  #Reading only 1mb at a time
        while dataChunk:
            for hashsum in hashSums.keys():
                hashSums[hashsum].update(dataChunk)
            dataChunk = fd.read(1024)

    results = od()
    for key, value in hashSums.items():
        results[key] = value.hexdigest()
    return results
Exemple #57
0
    def apply_changes_rename(self):

        df = self.df_edit
        mapping = od()

        for i, name in enumerate(self.run_names):
            repl = str(self.input_fields_rename[i].value)
            mapping[name] = repl
        self.df_edit = df.rename(index=mapping, level=self.run_level_idx)
        self.output.append_display_data(
            "Applying renaming: {}".format(mapping))
Exemple #58
0
    def apply_changes(self):

        df = self.df
        mapping = od()

        for i, name in enumerate(self.names):
            repl = str(self.input_fields[i].value)
            mapping[name] = repl
        self._df_edit = df.rename(index=mapping, level=self.level)

        self.disp_current()
Exemple #59
0
def _fake_import_specs():
    """Returns dictionary for adding a new fake import type"""
    return od([("type", "fake"),
               ("access_type", "col_index"),
               ("file_type", "csv"),
               ("time_str_formats", "%Y%m%d%H%M"),
               ("delim", ";"),
               ("start", 0), #col num
               ("stop", 1), #col num
               ("bla" , "Blub"), #invalid (for test purpose)
               ("num_scans", 4)]) #colnum
Exemple #60
0
    def compute(self, recs, truth, *, progress=lambda x: x):
        """
        Run the analysis.  Neither data frame should be meaningfully indexed.

        Args:
            recs(pandas.DataFrame):
                A data frame of recommendations.
            truth(pandas.DataFrame):
                A data frame of ground truth (test) data.

        Returns:
            pandas.DataFrame: The results of the analysis.
        """
        _log.info('analyzing %d recommendations (%d truth rows)', len(recs), len(truth))
        gcols = self.group_cols
        if gcols is None:
            gcols = [c for c in recs.columns if c not in self.DEFAULT_SKIP_COLS]
        _log.info('using group columns %s', gcols)
        _log.info('ungrouped columns: %s', [c for c in recs.columns if c not in gcols])
        gc_map = dict((c, i) for (i, c) in enumerate(gcols))

        ti_cols = [c for c in gcols if c in truth.columns]
        ti_cols.append('item')

        _log.info('using truth ID columns %s', ti_cols)
        truth = truth.set_index(ti_cols)
        if not truth.index.is_unique:
            warnings.warn('truth frame does not have unique values')
        truth.sort_index(inplace=True)

        _log.info('preparing analysis result storage')
        # we manually use grouping internals
        grouped = recs.groupby(gcols)

        res = pd.DataFrame(od((k, np.nan) for (f, k, args) in self.metrics),
                           index=grouped.grouper.result_index)
        assert len(res) == len(grouped.groups), \
            "result set size {} != group count {}".format(len(res), len(grouped.groups))
        assert res.index.nlevels == len(gcols)

        _log.info('computing anlysis for %d lists', len(res))
        for i, row_key in enumerate(progress(res.index)):
            g_rows = grouped.indices[row_key]
            g_recs = recs.iloc[g_rows, :]
            if len(ti_cols) == len(gcols) + 1:
                tr_key = row_key
            else:
                tr_key = tuple([row_key[gc_map[c]] for c in ti_cols[:-1]])

            g_truth = truth.loc[tr_key, :]
            for j, (mf, mn, margs) in enumerate(self.metrics):
                res.iloc[i, j] = mf(g_recs, g_truth, **margs)

        return res