Ejemplo n.º 1
0
def get_contact_timeline(id,type):
    db = init_db()
    contact = db.query(profile).filter(profile.id==id).first()
    if contact is None:
        return False
    temp = defaultdict(dict)
    temp['label'] = type
    temp1 = defaultdict(int)
    if type in ["email","chat"]:
        for chats in contact.chats:
            if type == "email":
                if chats.type == CHAT_EMAIL:
                    temp1[int(chats.ts*1000)] +=1
            if type == "chat":
                if chats.type not in [CHAT_EMAIL,CHAT_SOCIALMEDIA,CHAT_TWITTER]:
                    temp1[int(chats.ts*1000)] +=1
    if type == "feedback":
        for feedbacs in contact.feedbacks:
            temp1[int(feedbacs.ts*1000)] +=1
    if type=="twitter":
        tweets = db.query(tweet).filter(tweet.mentioner==contact.twitter)
        for obj in tweets:
           temp1[int(obj.ts*1000)] +=1

    temp['data'] = temp1.items()
    temp['data'].sort()
    print temp
    db.close()
    return temp
Ejemplo n.º 2
0
 def __init__(self, cols=[], rows=[]):
     super(Table, self).__init__()
     self.cols = cols
     self.rows = rows
     self.cells = defaultdict(lambda : defaultdict(Decimal))
     self.col_totals = defaultdict(Decimal)
     self.row_totals = defaultdict(Decimal)
Ejemplo n.º 3
0
def _get(request, catfn=lambda tx:tx.entity.category):
    title = 'All Transactions'
    account_id = None
    entity_id = None
    category_id = None
    amount_filter = 0.00
    
    if request.GET:
        account_id = request.GET.get('account_id', None)
        entity_id = request.GET.get('entity_id', None)
        category_id = request.GET.get('category_id', None)
        amount_filter = float(request.GET.get('amount', amount_filter))
        
    
    transactions = Transaction.objects.filter(posted__gte=first_of_year).filter(Q(amount__gte=amount_filter) | Q(amount__lte=(amount_filter*-1.0)))
    if account_id:
        title = 'Account = {}'.format(Account.objects.get(id=account_id).heading)
        transactions = transactions.filter(account__id=account_id)
    if entity_id:
        title = 'Entity = {}'.format(Entity.objects.get(id=entity_id).heading)
        catfn=lambda tx:tx
        transactions = transactions.filter(entity__id=entity_id)
    if category_id:
        title = 'Category = {}'.format(Category.objects.get(id=category_id).heading)
        catfn=lambda tx:tx.entity
        transactions = transactions.filter(entity__category__id=category_id)
    
    total_table = Table(cols=months)
    
    for tx in transactions:
        month = tx.posted.strftime('%b').upper()
        amount = tx.amount
        category = catfn(tx)
        total_table.add(month, category, amount)

    accounts = Account.objects.all()
    balances = Balance.objects.all()
    total = sum([t.amount for t in transactions])
    
    balance_dates = set([b.as_of_date for b in balances])
    balance_amounts = defaultdict(lambda : defaultdict(Decimal))
    balance_totals = defaultdict(Decimal)
    for balance in balances:
        date = balance.as_of_date
        amount = balance.signed_amount
        account = balance.account
        balance_dates.add(date)
        balance_amounts[date][account] += amount
        balance_totals[date] += amount
    balance_dates = sorted(list(balance_dates))
    
    return render_to_response('index.html', {'title':title,
                                             'transactions':transactions,
                                             'accounts':accounts,
                                             'balances':balances,
                                             'totals':total_table,
                                             'balance_dates' : balance_dates,
                                             'balance_amounts' : balance_amounts,
                                             'balance_totals' : balance_totals,
                                             'total':total}, RequestContext(request))
Ejemplo n.º 4
0
def A_priori(infile,support):
    infile=getDataFromFile(infile) #Ekteloume tin diadikasia getDataFromFile 
    lineList= list() #Dimiourgoume tin lista lineList
    Items=set() #Dimiourgoume to set me onoma Items etsi wste na proshtesoume ola ta stoixeia pou periexontai sta arxeia ws dedomena!!Me to set tha apaloifthoun kai oi diplotipes eggrafes diladi den tha exoume 2 fores to idio item!! 
    for line in infile:
        #To frozenset dilwnei oti den borei na allaksei to periexomeno tou!!
        lineList.append(frozenset(line)) #Prosthetw sto telos kathe fora tin kathe grammi pou periexei to arxeio pou exoume anoiksei apo ti diadikasia getDataFromFile!!    
        for item in line:
                Items.add(frozenset([item])) #Prosthetw to Items tis lekseis pou exei kathe grammi!!
    
    
    All_freqSet=defaultdict(int) #Epistrefei ena leksiko san antikeimeno pou einai mia ipokatigoria tou dict!Edw tha bainoun ola ta stoixeia stis lekseis kai me ton arithmo emfanisi tous sta kalathia pou einai oi grammes!
    freqk=defaultdict(int) #Tha xrisimopoietai gia na periexe tis sixnotites ton stoixeiwsinolwn gia to kathe kalathi se ola ta kalathia!!
    allset=dict()
    k=2
    #Edw kanoume to prwto perasama apo ton algorithmo A_Priori!!
    fristpass,allset_temp=FindItemsFirstTime(lineList,Items,support,freqk) #sto firstpass,kai sto allset_temp pername ta apotelesmata tis sinartisis findItemsFirstTime!!
    freqk=fristpass #Edw topothetoume to firstpass se to dict freqk!!
    #Twra oso to  dict freqk einai gemato kane to eksis:
    while (bool(freqk)):
        All_freqSet.update(freqk) #Vale tis sixnotites twn stoixeiosinolwn pou vrikame sto kathe perasama sto all_freqSet
        allset[k-1]=allset_temp #Sto all set vazoume to allset_temp pou exoume parei ws apoteelsma apo to FindItemsFirstTime pou tha periexei kathe fora ta stoxeia pou pernane ton elegxo me ton elaxisto arithmo ipostiriksis!!
        anotherpass,allset_temp=AprioriPass(lineList,freqk,support,k) #Ekteloume ton algorithmo AprioriPass pou einai ta epomena vimata tou algorithmou A_Priori kai epistrefei ta apotelesmata kai ta vazei sto anotherpass kai sto allset_temp
        freqk=anotherpass #To freqk ginetai iso me to neo sinolo sixnotitwn pou einai apo to anotherpass!!
        k+=1 #auksisi tou megethos twn newn stoixeiwn!!
    #Edw Dimiorgoume mia lista me tin onomasia final_fREQ pou tha periexei ta stoixeia pou exoume parei apo ton A_Priori mazi me tis sixnotites tous!!
    final_freq=[]
   
    for key,value in All_freqSet.items():
        final_freq.extend(([tuple(key),All_freqSet[key]])) #sto final_Freq tha periexei prwta to stoixeiosinolo kai meta tin sixnolotita emfanisi tou sto basket!!
    
    return final_freq,allset #Epistrefoume to final_freq me tis sixnotites twn stoixeiosinolown kai to to allset pou periexei ola ta stoixeiosinola kai ta nea stoixeiosinola pou pernoume apo tin ektelesi tou A_Priori!
Ejemplo n.º 5
0
 def __init__(self, problem):
     self.problem = problem
     self.s = None
     self.a = None
     self.untried = defaultdict(list)
     self.unbacktracked = defaultdict(list)
     self.result = {}
Ejemplo n.º 6
0
    def learning_dictionary(self,data):
        dict_count_first_word=defaultdict(int)
        dict_count_each_word=defaultdict(int)
        dict_count_each_part_of_speech=defaultdict(int)
        dict_count_part_of_speech_CP=defaultdict(int)
        dict_count_word_part_of_speech=defaultdict(int)
        total_number_of_words=0
        for i in range(0,len(data)):
            dict_count_first_word[data[i][0][0]]=dict_count_first_word[data[i][0][0]]+1
            for j in range(0,len(data[i][0])):
                total_number_of_words+=1
                dict_count_each_word[data[i][0][j]]=dict_count_each_word[data[i][0][j]]+1
                dict_count_each_part_of_speech[data[i][1][j]]=dict_count_each_part_of_speech[data[i][1][j]]+1
                if j<len(data[i][0])-1:
                    CP_part_of_speech=data[i][1][j]
                    CP_part_of_speech+="-"
                    CP_part_of_speech+=data[i][1][j+1]
                    dict_count_part_of_speech_CP[CP_part_of_speech]=dict_count_part_of_speech_CP[CP_part_of_speech]+1


                    Word_part_of_speech=data[i][0][j]
                    Word_part_of_speech+="-"
                    Word_part_of_speech+=data[i][1][j]
                    dict_count_word_part_of_speech[Word_part_of_speech] = dict_count_word_part_of_speech[Word_part_of_speech]+1

        return dict_count_first_word,dict_count_each_word,dict_count_each_part_of_speech,dict_count_part_of_speech_CP,dict_count_word_part_of_speech,total_number_of_words
Ejemplo n.º 7
0
def load_big_from_figer_out(figerfile, numtype, myt2i):
    logger.info('loading figer results in a big matrix from %s', figerfile)
    f = open(figerfile)
    c = 0;
    big = defaultdict(lambda: defaultdict(list))
    for line in f:
        parts = line.strip().split('\t')
        if len(parts) < 3:
            continue
        if parts[1].startswith('B-')==False:
            continue
        
        emid = parts[2].strip()
#         
        scores = [0.0 for i in range(numtype)]
        for i in range(3, len(parts)):
            (mytype, score) = parse_one_type(parts[i])
            if mytype not in myt2i:
                continue
            scores[myt2i[mytype]] = score
        assert len(scores) == numtype
        scores = mynormalize(scores)      
        for j in range(numtype):
            big[emid][j].append(scores[j])
        c +=1
        if c == upto:
            break
#         logger.info('entity number: %d', c)
    logger.info('big has %d entities', len(big))
    return big
Ejemplo n.º 8
0
def PRiter(iterations=20):
    iter_time = time.time()

    nodes = NodeBuilder()
    iter_nodelist = defaultdict(lambda: 0.15)
    cur_nodelist = defaultdict(lambda: 0.85)


    print "iterations started", time.time() - iter_time

    for iter in xrange(iterations):

        iter_time = time.time()

        for data in files:
            checker(*data, dd=nodes, cur_nodelist=cur_nodelist, iter_nodelist=iter_nodelist)
        cur_nodelist, iter_nodelist = iter_nodelist, cur_nodelist
        iter_nodelist = defaultdict(lambda : 0.15)

        print time.time() - iter_time, "for iteration", iter



    sorted_nodes_top1000 =  sorted(cur_nodelist.items(), key=itemgetter(1), reverse=True) [:1000]
    for name in sorted_nodes_top1000:
        if name[0][0] == "c":
            print name
Ejemplo n.º 9
0
def find_vm_addr(trace):
    """
    Find the virtual machine addr
    :param trace: instruction trace
    :return: virtual function start addr
    """
    push_dict = defaultdict(lambda: 0)
    vm_func_dict = defaultdict(lambda: 0)
    # try to find the vm Segment via series of push commands, which identify the vm_addr also
    for line in trace:
        try:
            if line.disasm[0] == 'push':
                push_dict[GetFunctionAttr(line.addr, FUNCATTR_START)] += 1
        except:
            pass

    vm_func = max(push_dict, key=push_dict.get)
    vm_seg_start = SegStart(vm_func)
    vm_seg_end = SegEnd(vm_func)
    # test wheather the vm_func is the biggest func in the Segment
    vm_funcs = Functions(vm_seg_start, vm_seg_end)
    for f in vm_funcs:
        vm_func_dict[f] = GetFunctionAttr(f, FUNCATTR_END) - GetFunctionAttr(f, FUNCATTR_START)
    if max(vm_func_dict, key=vm_func_dict.get) != vm_func:
        return AskAddr(vm_func,
                "Found two possible addresses for the VM function start address: %s and %s. Choose one!" %
                (vm_func, max(vm_func_dict, key=vm_func_dict.get)))
    else:
        return vm_func
Ejemplo n.º 10
0
 def __init__(self, file_url):
     self.file_url = file_url
     self.file_url_out = file_url + ".out"
     self.dist = None
     self.users = None
     self.feeds = None
     self.movements = defaultdict(list)#LocationRecord
     self.moveint = defaultdict(list)#LocationRecord
Ejemplo n.º 11
0
def load_big_matrix(big_file):
    type2entprobs = defaultdict(lambda: defaultdict(list))
    with open(big_file) as fp:
        for line in fp:
            parts = line.split()
            for i, p in enumerate(parts[1:]):
                type2entprobs[i][parts[0]].append(float(p))
    logger.info('loading the big matrix %s finished', big_file)
    return type2entprobs
Ejemplo n.º 12
0
def calc_contacts_per_cluster_per_motif(contacts_per_cluster, protein_motifs, ordered_motifs, weight):
    contacts_per_motif = defaultdict(lambda: defaultdict(int))
    for motif_name in ordered_motifs:
        if motif_name in protein_motifs:
            for cluster_id in contacts_per_cluster: 
                for res_id in contacts_per_cluster[cluster_id]:
                    res = int(res_id.split(":")[0]) 
                    if res >= protein_motifs[motif_name][0] and res < protein_motifs[motif_name][1]:
                        contacts_per_motif[cluster_id][motif_name] += 1/weight  
    return contacts_per_motif
Ejemplo n.º 13
0
 def test_keyerror_without_factory(self):
     from _collections import defaultdict
     for d1 in [defaultdict(), defaultdict(None)]:
         for key in ['foo', (1,)]:
             try:
                 d1[key]
             except KeyError, err:
                 assert err.args[0] == key
             else:
                 assert 0, "expected KeyError"
Ejemplo n.º 14
0
def read_fa(data)-> dict:
    DATA = data.read().split()
    myDict = defaultdict(defaultdict)
    for e in DATA:
        D = e.split(';')
        tempDict = defaultdict(str)
        for j in range(1, len(D), 2):
            tempDict[D[j]] = D[j+1]
        myDict[D[0]] = tempDict
    return myDict
Ejemplo n.º 15
0
    def probability_dictionary(self,data):
        different_parts_of_speech=['ADJ','ADV','ADP','CONJ','DET','NOUN','NUM','PRON','PRT','VERB','X','.']
        part_of_speech_probability=defaultdict(int)
        sum_of_parts_of_speech=sum(self.dict_count_part_of_speech_CP.values())
        for i in range(0,len(different_parts_of_speech)):
            for j in range(0,len(different_parts_of_speech)):
                #if(different_parts_of_speech[i]!=different_parts_of_speech[j]):
                    #self.dict_count_part_of_speech_CP={} #e.g.,(dict["noun-verb"]=5)
                    temp_part_of_speech=""
                    temp_part_of_speech+="cp_"
                    temp_part_of_speech+=different_parts_of_speech[i].lower()
                    temp_part_of_speech+="|"
                    temp_part_of_speech+=different_parts_of_speech[j].lower()
                    a_int_b=self.dict_count_part_of_speech_CP[different_parts_of_speech[j].lower()+"-"+different_parts_of_speech[i].lower()]
                    prob_a_int_b=a_int_b/sum_of_parts_of_speech
                    #[p("verb")=0.5]
                    prob_part_of_speech=float(self.dict_count_each_part_of_speech[different_parts_of_speech[j].lower()])/float(self.total_number_of_words)
                    self.dict_prob_each_part_of_speech[different_parts_of_speech[j].lower()]=prob_part_of_speech
                    if(prob_part_of_speech!=0):
                        part_of_speech_probability[temp_part_of_speech]=prob_a_int_b/prob_part_of_speech
                    else:
                        part_of_speech_probability[temp_part_of_speech]=0.0005


        # self.dict_count_word_part_of_speech={}#e.g., (dict[hari-noun]=6)
        #self.dict_count_each_word(dict[hari]=6)
        #Word_part_of_speech=data[i][0][j]
        #Word_part_of_speech+="-"
        #Word_part_of_speech+=data[i][1][j]
        word_probability=defaultdict(int)

        for i in range(0,len(data)):
            for j in range(0,len(data[i][0])):
                word=data[i][0][j]
                for k in range(0,len(different_parts_of_speech)):
                    count_word=self.dict_count_each_word[word]
                    count_part_of_speech=self.dict_count_each_part_of_speech[different_parts_of_speech[k]]

                    count_word_int_part_of_speech=self.dict_count_word_part_of_speech[word+"-"+different_parts_of_speech[k].lower()]
                    probability_of_part_of_speech=self.dict_prob_each_part_of_speech[different_parts_of_speech[k].lower()]
                    temp_word=""
                    temp_word+="cp_"
                    temp_word+=word
                    temp_word+="|"
                    temp_word+=different_parts_of_speech[k].lower()
                    if(probability_of_part_of_speech!=0 and count_part_of_speech!=0):
                        word_probability[temp_word]=(float(count_word_int_part_of_speech)/float(count_part_of_speech))/probability_of_part_of_speech
                    else:
                        word_probability[temp_word]=0.0005

        #print part_of_speech_probability
        #print word_probability
        return part_of_speech_probability,word_probability
Ejemplo n.º 16
0
 def loadViewSelectionPlan(cls,filename):    
     inputfile = open(filename,"r")
     feedMap = {}
     userMap = {}
     viewMap = {}
     feedviewMap = defaultdict(set)
     queryPlan = defaultdict(set)
     
     lcount = 0
     totFeed = 0
     totUser = 0
     totView = 0
     totFvm = 0
     totQp = 0
     for line in inputfile:
         if lcount == 0:
             elem = line.strip().split("\t")
             totFeed = int(elem[0])
             totUser = int(elem[1])
             totView = int(elem[2])
             totFvm = int(elem[3])
             totQp = int(elem[4])
         elif lcount <= totFeed:
             f = FeedInfo.loadFromLine(line)
             feedMap[f.feed_id] = f
         elif lcount <= totFeed+totUser:
             u = UserInfo.loadFromLine(line)
             userMap[u.user_id] = u
         elif lcount <= totFeed+totUser+totView:
             v = View.loadFromLine(line, feedMap, userMap)
             viewMap[v.view_id] = v
         elif lcount <= totFeed+totUser+totView+totFvm:
             elem = line.strip().split("\t")
             fid = elem[0]
             viewset = set()
             for i in range(1,len(elem)):
                 viewset.add(viewMap[elem[i]])
             feedviewMap[fid]=viewset
         elif lcount <= totFeed+totUser+totView+totFvm+totQp:
             elem = line.strip().split("\t")
             uid = elem[0]
             viewset = set()
             for i in range(1,len(elem)):
                 viewset.add(viewMap[elem[i]])
             queryPlan[userMap[uid]] = viewset
         else:
             print("Error Line: " + line)
         lcount += 1
     
     inputfile.close()
     Plan = collections.namedtuple('Plan',['ViewMap','FeedMap','UserMap','FeedViewMap','QueryPlan'])
     P = Plan(ViewMap=viewMap,FeedMap=feedMap,UserMap=userMap,FeedViewMap=feedviewMap,QueryPlan=queryPlan)
     return P
Ejemplo n.º 17
0
def features(docList):
    import time

    
    # download model (only needs to be done once)
    model_dir = download_and_install_model('WSJ', '/tmp/models')
    # Loading the model is slow, but only needs to be done once
    rrp = RerankingParser.from_unified_model_dir(model_dir)
    rrp.set_parser_options(nbest = 5)
    features = []
    scores = []
    with open("output_log.txt", "w") as logF, open("syn_feats.pkl", "w")  as synFile, open("syn_scores.pkl", "w")  as scoresFile:

        for i, doc in enumerate(docList):
            start_time = time.time()

            features.append(defaultdict(float))
            scores.append(defaultdict(list))

            for sentence in doc:
                
                parses = rrp.parse(sentence, rerank=False)
                #print(len(parses))
                #print(sentence, file = logF)
                try:
                    parse_score = parses[0].parser_score
                    rerank_score = parses[0].reranker_score
                    scores[i]['parse'].append(parse_score)
                    scores[i]['rerank'].append(rerank_score)
                    scores[i]['sent_length'].append(len(parses[0].ptb_parse.tokens()))
    
                    best_parse = parses[0].ptb_parse
                    # print(best_parse, file = logF)
                
                    for t in best_parse.all_subtrees():
                        levels = buildSubtrees(t)
                        for l in levels:
                            features[i][l] += 1.0
                except:
                    print("No parse available - skipping")
            features[i] = {x:v for x,v in features[i].items()}
            print("{0}".format(sorted(features[i].items(), key=operator.itemgetter(1), reverse=True)), file = logF)
            print("--- {0} seconds for {1} sentences ---" .format(time.time() - start_time, len(doc)))

        pickle.dump(features, synFile)
        pickle.dump(scores, scoresFile)


#     t_bllip = Timer(lambda: rrp.parse(sentence))
#     print ("bllip", t_bllip.timeit(number=5))
    
    pass
Ejemplo n.º 18
0
def main():
        
    emit = defaultdict(int)
    transition = defaultdict(int)
    context = defaultdict(int)
    states = set()
    start = defaultdict(int)    
    wordToTag = dict()
    
    output = open("hmmoutput.txt", 'w')
    totalStart = 0
    with open("hmmmodel.txt") as f:
        for line in f:
            if line.startswith("T"):
                temp = line.rstrip("\n").split(" ")
                transition[temp[1].strip()+" "+temp[2].strip()] = float(temp[4].strip())
                if temp[1].strip().startswith("<s>"):
                    start[str(temp[2].strip())] += int(temp[3].strip())
                    totalStart += int(temp[3].strip())
            
            elif line.startswith("E"):
                temp = line.rstrip("\n").split(" ")
                emit[temp[1].strip()+" "+temp[2].strip()] = float(temp[3].strip())
            
            elif line.startswith("C"):
                temp = line.rstrip("\n").split(" ")
                context[temp[1].strip()] = int(temp[2].strip())
                states.add(temp[1].strip())
                
            elif line.startswith("W"):
                temp = line.rstrip("\n").split(" ")
                word = temp[1].strip()
                if word not in wordToTag:
                    wordToTag[word] = set()
                tags = temp[2].strip().rstrip(",").strip()
                tags = tags.split(",")
                for tag in tags:
                    wordToTag[word].add(tag.strip())
    
    for i in states:
        start[i] = start[i] * totalStart


    with open(sys.argv[1]) as f:
        for line in f:
            obs = line.rstrip("\n").split(" ")
        
            listOfTags = viterbi(obs, states,  start, transition, emit, context, wordToTag)
            for i in range(len(listOfTags)-1, -1, -1):
                output.write(str(listOfTags[i])+" ")
            output.write("\n")
Ejemplo n.º 19
0
def main():
    
    emit = defaultdict(int)
    transition = defaultdict(int)
    context = defaultdict(int)
    wordToTag = dict()
    states = set()
    
    result=open('hmmmodel.txt', 'w')
    
    with open(sys.argv[1]) as f:
        for line in f:
            previous = "<s>"
            context[previous] += 1
    
            wordtags = line.strip().split(" ")
            for wordtag in wordtags:
                word = wordtag[:len(wordtag)-3]
                tag = wordtag[-2:]    
                transition[previous+" "+tag] += 1
                context[tag] += 1
                emit[tag+" "+word] += 1
                previous = tag
                
                if word not in wordToTag:
                    wordToTag[word] = set()
                    
                wordToTag[word].add(tag)  
                states.add(tag)  
            transition[previous+" </s>"] += 1
            
        
    numberOfStates = len(states)
    for key in transition:
        previous, tag = key.split(" ")
        result.write("T " + key + " " + str(transition[key])+ " " + str((transition[key] + 1)/(context[previous] + numberOfStates)) +"\n")
        
    for key in emit:
        tag, word = key.split(" ")
        result.write("E " + key + " " + str(emit[key]/ context[tag]) + "\n")
        
    for key in context:
        result.write("C " + key + " " + str(context[key]) + "\n")
    
    for key in wordToTag:
        s = "W " + key + " "
        for tag in wordToTag[key]:
            s += str(tag) + ","
        s += "\n" 
        result.write(s)
Ejemplo n.º 20
0
def AprioriPass(lineList,freqk,Support,k):
    items_pairs=GetPairs(freqk,k) #ekteloume tin diadikasia getPairs pou pairnoume ta dinata zeugaria apo ta stoixeiosinola pou exoume ws orisma apo tin sinartisi AprioriPass
    freqk=defaultdict(int) #to freqk gia na vroume tin sixnotita twn newn stoixeiosinolwn!
    item_temp=set() #Ta antikeimena pou tha exoun kseperasei ton elegxo tis sixnotitas tous me to support!
    local_temp=defaultdict(int)
    for items in items_pairs: #gia kathe item pou exoume parei apo to GetPairs tote exoume:
            for line in lineList: #gia kathe grammi tou arxeiou exoume
                    if items.issubset(line): #An to item periexete stin grammi tote
                       local_temp[items]+=1 # #Auksise to local_temp tou item kata 1!
    for item1,count in local_temp.items(): #Twra gia antikeimeno tou local_temp pairnoume to item1(key) kai to count(value) tote:
        if(count >= Support): #An to count einai megalitero tou support tote
            freqk[item1]=count   #vale sto freqk tou item1 to count(tin sixnotita)
            item_temp.add(item1) #kai vale to antikeimeno sto item_temp
    return freqk,item_temp #Epistrefoume to freqk(ta stoixeiosinla me tin sixnotita tous) kai to item_temp(ta stoixeiosinola tou k+1 a_priori)
Ejemplo n.º 21
0
 def __init__(self, config, globalDict, parent=None):
     VoltageGlobalAdjustForm.__init__(self)
     VoltageGlobalAdjustBase.__init__(self, parent)
     self.config = config
     self.configname = 'VoltageGlobalAdjust.Settings'
     self.settings = self.config.get(self.configname, Settings())
     self.globalAdjustDict = SequenceDict()
     self.myLabelList = list()
     self.myBoxList = list()
     self.historyCategory = 'VoltageGlobalAdjust'
     self.adjustHistoryName = None
     self.globalDict = globalDict
     self.adjustCache = self.config.get(self.configname+".cache", dict()) 
     self.savedValue = defaultdict( lambda: None )
     self.displayValueObservable = defaultdict( lambda: Observable() )
Ejemplo n.º 22
0
def fillUsingLines(linespath):
    e2name2freq = defaultdict(dict)
    f = open(linespath)
    for line in f:
        parts = line.split('\t')
        for w in parts[4].split():
            if '/m/' in w:
                (mid, tokens, notabletype) = getentparts(w)
                name = ' '.join(tokens)
                if mid not in e2name2freq:
                    e2name2freq[mid] = defaultdict(lambda: 0)
                e2name2freq[mid][name] += 1
    f.close()
    return e2name2freq 
    
Ejemplo n.º 23
0
 def mapLocationToFeed(self):
     if self.feeds is not None and self.movements is not None:
         gridEventMap = defaultdict(list)
         for fid,locrlist in self.movements.items():
             for locr in locrlist:
                 loc = Location(locr.latitude,locr.longitude)
                 if loc in gridEventMap.keys():
                     gridEventMap[loc].append(locr)
                 else:
                     elist = list()
                     elist.append(locr)
                     gridEventMap[loc] = elist
         self.movements.clear()
         self.feeds.clear()
         for gid,elist in gridEventMap.items():
             intv = list()
             for locr in elist:
                 intv.append(locr.loc_timestamp)
             totintv = 0.0
             if len(intv) > 1:
                 for i in range(0,len(intv)-1):
                     totintv += abs((intv[i+1] - intv[i]).total_seconds() / LocationRecord.TimeScale)
                 totintv /= len(intv) - 1
             lid = "U"+str(gid.x_dimension)+"-"+str(gid.y_dimension)
             new_feed = FeedInfo(lid,gid,FeedInfo.default_tag,int(math.ceil(totintv)))
             self.feeds[lid] = new_feed
Ejemplo n.º 24
0
def word_count(words, existing_list=None):
    result = existing_list if existing_list else defaultdict(int)
    for word in words:
        if word == '\'s':
            continue
        result['{0}'.format(word)] += 1
    return result
Ejemplo n.º 25
0
def get_high_information_words(lwords, score_fn=BigramAssocMeasures.chi_sq, min_score=5):
    labels = lwords.keys()
    labelled_words = [(l, lwords[l]) for l in labels]
    word_freq_dist = FreqDist()
    label_word_freq_dist = ConditionalFreqDist()

    for label, dwords in labelled_words:
        for words in dwords:
            for word in words:
                word_freq_dist[word] += 1
                label_word_freq_dist[label][word] += 1

    n_words_total = label_word_freq_dist.N()
    high_info_words = set()

    for label in label_word_freq_dist.conditions():
        n_words_label = label_word_freq_dist[label].N()
        word_scores = defaultdict(int)

        for word, word_freq_label in label_word_freq_dist[label].items():
            word_freq = word_freq_dist[word]
            score = score_fn(word_freq_label, (word_freq, n_words_label), n_words_total)
            word_scores[word] = score

        bestwords = [word for word, score in word_scores.items() if score >= min_score]
        high_info_words |= set(bestwords)
    return high_info_words
Ejemplo n.º 26
0
def solve(par):
    C, combine, D, opposite, N, S = par
    comb = {}
    for c in combine:
        x = list(c)[:2]
        comb[tuple(x)] = c[2]
        x.reverse()
        comb[tuple(x)] = c[2]
    oppo = defaultdict(list)
    for o in opposite:
        oppo[o[0]].append(o[1])
        oppo[o[1]].append(o[0])
    
    result = []
    for s in list(S):
        if len(result) > 0 and (result[-1], s) in comb:
            c = result[-1]
            result.pop()
            result.append(comb[(c, s)])
            continue
        
        flag = True
        if s in oppo:
            for x in oppo[s]:
                if x in result:
                    result = []
                    flag = False
                    break
        if flag:
            result.append(s)
            
    return '[' + ', '.join(result) + ']'
Ejemplo n.º 27
0
def formDictionary(file):
    """For creating a dictionary of word:phonemes from the CMU phoneme dictionary"""
    phoneme_dictionary = defaultdict()

    for line in open(file):
        if line[0].isalpha(): #This is where we start the dictionary, skipping over all punctuation and developer notes in the beginning.
            split = line.split() #Split is a list of the line item separated by whitespace
            for item in split: #item is each individual segment of the line
                if item[len(item)-1] == ")":#If word is duplicate and has a (1) or similar sequence after it, cut it out. There are multiple pronunciations for this word.
                    item = item[0:len(item)-3]
                    split[0] = item #First item in line is now edited word
                    #split[split.index(item)] = item[0:len(item)-3]
                if not item[len(item)-1].isalpha(): #if the item is not entirely letters, cut the last item off.
                    split[split.index(item)] = item[0:len(item)-1] 
 
            if split[0] in phoneme_dictionary.keys(): #if this word is already in dictionary, make a list out of it's existing list
                list = [phoneme_dictionary[split[0]], split[1:] ] #phoneme_dictionary[split[0]] = existing list for duplicate phonemes, next index is current list of phonemes
                phoneme_dictionary[split[0]] = list
            else: 
                phoneme_dictionary[split[0]] = split[1:]#dictionary of {word:[phoneme list]}
            #print split
            
    current_path = open("phoneme_output.txt", "w")
    for entry in phoneme_dictionary:
        current_path.write(entry + str(phoneme_dictionary.get(entry)) + "\n")
    
    current_path = open("phon_pickle.txt", "w")
    pickle.dump(phoneme_dictionary,current_path, pickle.HIGHEST_PROTOCOL)
    current_path.close()
    
    return phoneme_dictionary
Ejemplo n.º 28
0
 def test_default_factory(self):
     import _collections
     f = lambda: 42
     d = _collections.defaultdict(f)
     assert d.default_factory is f
     d.default_factory = lambda: 43
     assert d['5'] == 43
Ejemplo n.º 29
0
def read_graph(file)-> dict:
    DATA = file.read().split()
    myDict = defaultdict(set)
    for e in DATA:
        E =  e.split(';')
        myDict[E[0]].add(E[1])
    return myDict
def counting_sort(A, key=lambda x:x):
    B, C = [], defaultdict(list)
    for x in A:
        C[key(x)].append(x)
    for k in range(min(C), max(C)+1):
        B.extend(C[k])
    return B
Ejemplo n.º 31
0
def gviz_representation(pred_map):
    child_lookup =defaultdict(lambda: None)
    active_nodes = []
    for c in pred_map:
        if pred_map[c]:
            for p in pred_map[c]:
                if p not in child_lookup:
                    child_lookup[p]=[c]
                else:
                    child_lookup[p].append(c)

    rstr = "digraph {\n node [shape=\"circle\"];\n"

    for p in pred_map:
        c_list = child_lookup[p]
        if(c_list):
            c_str = " ".join(str(c.id) for c in c_list)
            rstr += str(p.id) + "->{" + c_str + "}\n"
    rstr+="}\n"
    return rstr
Ejemplo n.º 32
0
 def PriorFociWithSimilarFringe(self,
                                *,
                                current_focus,
                                timestamp,
                                threshold=0.2,
                                decay_factor=0.97):
   """Gets prior items with overlapping fringe."""
   scores = defaultdict(float)
   for fe, wt in current_focus.stored_fringe.items():
     for other_focusable, other_wt in self.fringe_element_to_item_to_wt[
         fe].items():
       if other_focusable is not current_focus:
         scores[other_focusable] += other_wt * wt
   out = []
   for other_focusable in scores.keys():
     age = max(0, timestamp - self.last_focus_time[other_focusable])
     scores[other_focusable] *= (decay_factor**age)
     if scores[other_focusable] >= threshold:
       out.append((other_focusable, scores[other_focusable]))
   return sorted(out, reverse=True, key=lambda x: x[1])
Ejemplo n.º 33
0
    def get_costs(self):  #method to create costs dictionary
        maxCost = 0
        edge = None
        costs = defaultdict(lambda: 1000)  #default dic w lambda at "infinity"
        costForTrain = {}
        for i in self.Edges:  #go through edges
            costs[(i[0], i[1])] = i[2]  #put in the cost
            costs[(i[1], i[0])] = i[2]
            if i[4] == 'B':
                print("i: ", i)
                print("i2: ", i[2])
                costForTrain[(i[0], i[1], i[2])] = i[2]  #put in the cost
        for i in self.Verticies:  #go through verticies
            costs[(i[0], i[0])] = 0  #cost from self to self
        totalSpending = 0
        '''
        while totalSpending <= 100000000 and costForTrain[max(costForTrain.keys())] > 2:
            totalSpending += costForTrain[max(costForTrain.keys())]*1000000
            if totalSpending > 100000000:
                break
            self.profit -= costForTrain[max(costForTrain.keys())]*1000000
            max(costForTrain.keys())[2] = 2
            costs[(max(costForTrain.keys())[0], max(costForTrain.keys())[1])] = 2
            costs[(max(costForTrain.keys())[1], max(costForTrain.keys())[0])] = 2
        '''

        while totalSpending <= 100000000 and costs[max(costs.keys())] > 2:
            totalSpending += costs[max(costs.keys())] * 1000000
            if totalSpending > 100000000:
                break
            self.profit -= costs[max(costs.keys())] * 1000000
            edge = None
            for i in self.Edges:
                if max(costs.keys())[0] == i[0] and max(
                        costs.keys())[1] == i[1]:
                    edge = i
                    break
            edge[2] = 2
            costs[(max(costs.keys())[0], max(costs.keys()[1]))] = 2
            costs[(max(costs.keys())[1], max(costs.keys()[0]))] = 2
        return costs  #return the dictionary
Ejemplo n.º 34
0
    def checkIfPrerequisite(self, n: int, prerequisites, queries):
        self.graph = defaultdict(list)

        for pre in prerequisites:
            self.graph[pre[1]].append(pre[0])
        print(self.graph)

        self.yz = [[] for i in range(n)]

        def bfs(s):
            visited = [False] * n
            queue = [s]
            visited[s] = True

            while queue:
                cur = queue.pop(0)
                self.yz[s].append(cur)

                for i in self.graph[cur]:
                    if visited[i] == False:
                        queue.append(i)
                        visited[i] = True

        for i in range(n):
            bfs(i)
            print(self.yz)

        #return self.yz  # [[0, 1], [1]]
        print(self.yz)

        ans = []
        for query in queries:
            if len(self.yz[query[1]]) == 1:
                ans.append(False)
            else:
                if query[0] in self.yz[query[1]]:
                    ans.append(True)
                else:
                    ans.append(False)

        return ans
Ejemplo n.º 35
0
def refresh_schema(pk):
    try:
        database = Database.objects.get(pk=pk)
        schema_query = Schema_Query.get(database.db_type)
        schema_data = []
        if database.db_type != 'sqlserver':
            flag, schema_data = run_sql(database, schema_query)
            if not flag:
                raise build_exception_from_java(schema_data)
            else:
                schema_data = sqlserver_schema_data(database)
            schema_dic = defaultdict(OrderedDict)
            type_map = Type_TO_CN.get(database.db_type)
            for x in schema_data:
                owner = x.get('OWNER')
                object_type = type_map.get(x.get('OBJECT_TYPE'))
                object_name = x.get('OBJECT_NAME')
                if not schema_dic.get(owner) or not schema_dic.get(owner).get(
                        object_type):
                    schema_dic[owner][object_type] = []
                schema_dic[owner][object_type].append(object_name)

            detail = OrderedDict(sorted(schema_dic.items()))
            created_at = datetime.now().replace(microsecond=0)
            schema = DB_SCHEMA.objects.update_or_create(database=database,
                                                        defaults={
                                                            'detail':
                                                            detail,
                                                            'created_at':
                                                            created_at
                                                        })
            Key_Template = f'''{pk}:schema:*'''
            for key in redis.scan_iter(Key_Template):
                redis.delete(key)

            get_table_rows(database)
        return detail
    except ObjectDoesNotExist:
        return {'error_message': ''}
    except Exception as err:
        return {'error_message': str(err)}
Ejemplo n.º 36
0
def get_sim_item(df_, user_col, item_col, use_iif=False):

    df = df_.copy()
    user_item_ = df.groupby(user_col)[item_col].agg(list).reset_index()
    user_item_dict = dict(zip(user_item_[user_col], user_item_[item_col]))

    user_time_ = df.groupby(user_col)['time'].agg(list).reset_index()  # 引入时间因素
    user_time_dict = dict(zip(user_time_[user_col], user_time_['time']))

    sim_item = {}
    item_cnt = defaultdict(int)  # 商品被点击次数
    for user, items in tqdm(user_item_dict.items()):
        for loc1, item in enumerate(items):
            item_cnt[item] += 1
            sim_item.setdefault(item, {})
            for loc2, relate_item in enumerate(items):
                if item == relate_item:
                    continue
                t1 = user_time_dict[user][loc1]  # 点击时间提取
                t2 = user_time_dict[user][loc2]
                sim_item[item].setdefault(relate_item, 0)
                if not use_iif:
                    if loc1 - loc2 > 0:
                        sim_item[item][relate_item] += 1 * 0.7 * (0.8**(
                            loc1 - loc2 - 1)) * (1 -
                                                 (t1 - t2) * 10000) / math.log(
                                                     1 + len(items))  # 逆向
                    else:
                        sim_item[item][relate_item] += 1 * 1.0 * (0.8**(
                            loc2 - loc1 - 1)) * (1 -
                                                 (t2 - t1) * 10000) / math.log(
                                                     1 + len(items))  # 正向
                else:
                    sim_item[item][relate_item] += 1 / math.log(1 + len(items))

    sim_item_corr = sim_item.copy()  # 引入AB的各种被点击次数
    for i, related_items in tqdm(sim_item.items()):
        for j, cij in related_items.items():
            sim_item_corr[i][j] = cij / ((item_cnt[i] * item_cnt[j])**0.2)

    return sim_item_corr, user_item_dict
Ejemplo n.º 37
0
def parse_mscx(fh, voices, out, nverses=1):

    doc = pulldom.parse(fh)

    level = 0

    tracks = {}
    staff_tracks = defaultdict(set)

    for event, node in doc:
        if event == pulldom.START_ELEMENT:

            if level == 2 and node.tagName == "Part":

                track_name, staff_ids = parse_part(doc)

                if track_name and staff_ids:
                    tracks[track_name] = staff_ids
                    for staff_id in staff_ids:
                        staff_tracks[staff_id].add(track_name)

            elif level == 2 and node.tagName == "Staff" and staff_tracks[
                    node.getAttribute("id")] & voices:

                log.info("Parsing staff with ID [%s]" %
                         node.getAttribute("id"))
                parse_staff(doc, out, nverses=nverses)

            else:
                if log.getEffectiveLevel() <= logging.DEBUG and level < 3:
                    log.debug("%sGot <%s>." % (" " * level, node.tagName))
                level += 1

        elif event == pulldom.END_ELEMENT:

            level -= 1

            if log.getEffectiveLevel() <= logging.DEBUG and level < 3:
                log.debug("%sGot </%s>." % (" " * level, node.tagName))

    log.info("Found tracks [%s]" % (tracks, ))
Ejemplo n.º 38
0
    def fetch_and_create_graph(self):
        '''
        Get data using requests

        For each word in the response, create and add them to `buckets`
            WELSH goes into W_LSH, WE_SH, WEL_H and WELS_ buckets
        Second step, spin a graph amongst the buckets
        :return:
        '''
        buckets = defaultdict(set)

        # # Test data
        if self.test:
            line_iter = t_data

        # fetch data
        else:
            response = requests.get(self.uri)
            line_iter = response.iter_lines()
            next(line_iter)

        # parse words
        for line in line_iter:
            for word in line.split():
                word = word.decode("utf-8")
                self.all_words.add(word)

                for i in range(len(word)):
                    # create or get buckets of neighbors
                    bucket = word[:i] + '_' + word[i + 1:]
                    buckets[bucket].add(word)

        print(f'found {len(self.all_words)} words')

        # second step
        for bucket in buckets:
            for word1 in buckets[bucket]:
                for word2 in buckets[bucket]:
                    if word1 != word2:
                        self.graph[word1].add(word2)
                        self.graph[word2].add(word1)
Ejemplo n.º 39
0
    def __init__(self, top_url, max_level=5, max_links=50):
        self.top_url = top_url
        self.max_level = max_level
        self.max_links = max_links
        self.observed_links = {}  # page url -> links
        self.visited_links = {}  # page number -> link
        self.product_links = set()
        self.printed_skipped_urls = set()
        self.num_visited_pages = 0
        self.num_walks = 0
        # links that redirect to other domains etc.
        self.blacklisted_links = set()
        self.link_visit_counts = defaultdict(int)  # page number -> link
        self.top_url_tld = get_tld_or_host(top_url)  # TLD for the first URL
        self.base_filename = safe_filename_from_url(
            top_url.replace("http://", "").replace("https://", ""))
        self.outdir = join(OUTDIR, self.base_filename)
        self.png_file_name = join(self.outdir, 'PAGE_NO_URL.png')
        self.page_src_file_name = join(self.outdir, 'PAGE_NO_URL.html')
        self.links_json_file_name = join(self.outdir,
                                         'links_%s.json' % self.base_filename)
        self.visited_links_json_file_name = join(
            self.outdir, 'visited_links_%s.json' % self.base_filename)
        self.product_links_file_name = join(
            self.outdir, 'product_links_%s.txt' % self.base_filename)

        from selenium.webdriver.chrome.options import Options
        from selenium import webdriver
        chrome_options = Options()
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--disable-dev-shm-usage")
        chrome_prefs = {}
        chrome_options.experimental_options["prefs"] = chrome_prefs
        chrome_prefs["profile.default_content_settings"] = {"images": 2}
        from selenium.webdriver.firefox.options import Options
        self.driver = webdriver.Chrome(options=chrome_options)
        self.driver.set_page_load_timeout(PAGE_LOAD_TIMEOUT)
        self.external_link_err_cnt = 0
        self.timeout_err_cnt = 0
        self.make_site_dir()
Ejemplo n.º 40
0
def dynamic_vm_values(trace, code_start=BADADDR, code_end=BADADDR, silent=False):
    """
    Find the virtual machine context necessary for an automated static analysis.
    code_start = the bytecode start -> often the param for vm_func and usually starts right after vm_func
    code_end = the bytecode end -> bytecode usually a big chunk, so if we identify several  x86/x64 inst in a row we reached the end
    base_addr = startaddr of the jmp table -> most often used offset in the vm_trace
    vm_addr = startaddr of the vm function -> biggest function in .vmp segment,
    :param trace: instruction trace
    :return: vm_ctx -> [code_start, code_end, base_addr, vm_func_addr, vm_funcs]
    """
    base_addr = defaultdict(lambda: 0)
    vm_addr = find_vm_addr(deepcopy(trace))
    trace, vm_seg_start, vm_seg_end = extract_vm_segment(trace)

    code_addrs = []

    # try finding code_start
    if code_start == BADADDR:
        code_start = GetFunctionAttr(vm_addr, FUNCATTR_END)#NextHead(GetFunctionAttr(vm_addr, FUNCATTR_END), vm_seg_end)
        code_start = NextHead(code_start, BADADDR)
        while isCode(code_start):
            code_start = NextHead(code_start, BADADDR)

    for line in trace:
        # construct base addr dict of offsets -> jmp table should be the one most used
        if len(line.disasm) == 2:
            try:
                offset = re.findall(r'.*:off_([0123456789abcdefABCDEF]*)\[.*\]', line.disasm[1])[0]
                base_addr[offset] += 1
            except:
                pass
        # code_start additional search of vm_func params
        if line.addr == vm_addr:
            for l in trace[:trace.index(line)]:
                if l.disasm[0] == 'push':
                    try:
                        arg = re.findall(r'.*_([0123456789ABCDEFabcdef]*)', l.disasm[1])
                        if len(arg) == 1:
                            code_addrs.append(int(arg[0], 16))
                    except Exception, e:
                        print e.message
def path_finder_bfs(start: str, end: str, graph: Dict):
    # Setup
    visited = defaultdict(int)
    visited[start] = True
    deck = deque()
    for word in graph[start]:
        deck.append((word, [start]))

    # Solve for path
    while deck:
        word, result = deck.popleft()
        # Found path
        if word == end:
            return result + [end]
        # Still Searching
        for next_word in graph[word]:
            if not visited[next_word]:
                deck.append((next_word, result + [word]))
        visited[word] = True
    # No path found
    return None
Ejemplo n.º 42
0
    def read_test_files(self, annotation_file):
        '''
        Read files for testing
        '''
        features_test = []
        labels_test = []

        annotation_file = self.basepath + '/annotations/general/' + annotation_file
        annotations = self._read_annotations(annotation_file)

        annotation_dict = defaultdict(list)
        for anno in annotations:
            annotation_dict[anno[3]].append(anno)

        for filename, annos in annotation_dict.items():
            path = self.basepath + '/audio/' + filename
            features, labels = self._read_test_windows(path, annos)
            features_test.extend(features)
            labels_test.extend(labels)

        return features_test, labels_test
Ejemplo n.º 43
0
 def _handle_if_timeouts(self):
     """
     Periodically checks each interface state and issues an if revocation, if
     no keep-alive message was received for IFID_TOUT.
     """
     if_id_last_revoked = defaultdict(int)
     while self.run_flag.is_set():
         start_time = time.time()
         with self.ifid_state_lock:
             for (if_id, if_state) in self.ifid_state.items():
                 cur_epoch = ConnectedHashTree.get_current_epoch()
                 # Check if interface has timed-out.
                 if ((if_state.is_expired() or if_state.is_revoked())
                         and (if_id_last_revoked[if_id] != cur_epoch)):
                     if_id_last_revoked[if_id] = cur_epoch
                     if not if_state.is_revoked():
                         logging.info("IF %d appears to be down.", if_id)
                     self._issue_revocation(if_id)
                     if_state.revoke_if_expired()
         sleep_interval(start_time, self.IF_TIMEOUT_INTERVAL,
                        "Handle IF timeouts")
Ejemplo n.º 44
0
 def _schedule_all_holidays(self, holiday_ids_to_ignore=()):
     resolver = DataResolver(None)
     season_service = services.season_service()
     current_season_length = season_service.season_length_option
     drama_scheduler = services.drama_scheduler_service()
     season_data = defaultdict(list)
     for (season_type,
          season_content) in season_service.get_seasons_for_scheduling():
         season_data[season_type].append(season_content)
     for (season, day, holiday_id) in self._holiday_times[
             current_season_length].holidays_to_schedule_gen():
         if holiday_id in holiday_ids_to_ignore:
             continue
         for season_content in season_data[season]:
             holiday_start_time = season_content.start_time + create_time_span(
                 days=day)
             drama_scheduler.schedule_node(
                 HolidayService.CUSTOM_HOLIDAY_DRAMA_NODE,
                 resolver,
                 specific_time=holiday_start_time,
                 holiday_id=holiday_id)
Ejemplo n.º 45
0
 def predictLocation(self, tokens_in_tweet):
     gc_probabilities_for_tweet = defaultdict(float)
     token_found = False
     for token in tokens_in_tweet:
         gc_probabilities_dict_for_token = self.gc_probabilities_dict_for_tokens.get(
             token)
         if gc_probabilities_dict_for_token == None:
             continue
         else:
             token_found = True
             for gcid in gc_probabilities_dict_for_token.keys():
                 gc_probability_for_token = gc_probabilities_dict_for_token[
                     gcid]
                 gc_probabilities_for_tweet[
                     gcid] += gc_probability_for_token * self.inf_gain_ratios[
                         token]
     if token_found:
         return max(gc_probabilities_for_tweet.iteritems(),
                    key=operator.itemgetter(1))[0]
     else:
         return self.gcid_with_max_prior
Ejemplo n.º 46
0
def recommend(user_count, user_dict, K, topN):
    #     rank = defaultdict(int)
    W = measureSimilarity(user_dict)
    f = open("result.txt", "w")
    user_id = 1
    while user_id <= user_count:
        rank = defaultdict(
            int)  #the most important word and easy to write in the wrong site
        for i, score in user_dict[user_id]:
            for j, wj in sorted(W[i].items(), key=itemgetter(1),
                                reverse=True)[0:K]:
                if j in user_dict[user_id]:
                    continue
                rank[j] += score * wj
        l = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:topN]
        print('user_id ' + str(user_id) + ' : ')
        print(l)
        for item in l:
            f.write(str(user_id) + ' | ' + str(item[0]))
            f.write("\n")
        user_id += 1
Ejemplo n.º 47
0
 def __init__(self, crawlParams):
     #self.visited = []
     self.visited = {}
     self.pagesCount = 0
     self.priorityQueue = crawlParams['priorityQueue']
     self.scorer = crawlParams['scorer']
     self.pageScoreThreshold = crawlParams['pageScoreThreshold']
     self.urlScoreThreshold = crawlParams['urlScoreThreshold']
     self.pagesLimit = crawlParams['num_pages']
     #self.mode = crawlParams['mode']
     self.restricted = crawlParams['restricted']
     self.combineScore = crawlParams['combineScore']
     self.pagesDir = crawlParams['pagesDir']
     #self.hosts_RelNonRelLists={}
     self.bufferLen = crawlParams['bufferLen']
     self.sourcesImp = defaultdict(
         lambda: [1., 1.]
     )  #list contains number of relevant at index 0 and number of non-relevant at index 1
     self.siScoreCombineMethod = crawlParams['siScoreCombineMethod']
     self.topicWeight = 0.6
     self.siWeight = 0.4
def AP_interpolated_buckets(sortedRanks, cutoff, total_ranks):
    values = list()
    index_count = defaultdict(lambda: 0)
    for rank in sortedRanks:
        for i in range(1, bucket_count + 1):
            if rank <= i:
                index_count[i] += 1
    for i in range(1, bucket_count + 1):
        counter = index_count[i]
        value = counter / (i * (total_ranks / bucket_count))
        values.append(value)
    values.reverse()
    sum_ = 0
    max_ = -1
    for value in values:
        if value > max_:
            sum_ += value
            max_ = value
        else:
            sum_ += max_
    return (sum_ / len(values), len(values))
Ejemplo n.º 49
0
    def output(self):
        output_level = defaultdict(str)
        total_pos = 0

        def draw_node(i, level):
            nonlocal total_pos
            if i >= len(self.datas):
                return 0
            ln = draw_node(2 * i + 1, level + 1)
            ol_str = output_level[level]
            for k in range(total_pos - len(ol_str)):
                ol_str += ' '
            ol_str += str(self.datas[i])
            output_level[level] = ol_str
            total_pos += 2
            rn = draw_node(2 * i + 2, level + 1)
            return ln + rn + 1

        draw_node(0, 0)
        for k, v in sorted(output_level.items(), key=lambda e: e[0]):
            print(v)
Ejemplo n.º 50
0
def attribute_value_proportion(instances, attribute, attribute_names):
    '''Returns a defaultdict containing the counts of occurrences and proportion of each value of attribute in the list of
    instances.attribute_names is the list we created above, where each element is the name of an attribute.'''
    attribute_value_counts = defaultdict(int)
    instance_value_counts = {}
    instance_proportions = {}
    #find position of attribute in attribute_name
    position_index = attribute_names.index(attribute)

    #count occurrences of values in that position in the index list
    for instance in instances:
        #save the value of the attribute
        instance_value = instance[position_index].strip()
        if instance_value not in instance_value_counts:
            #add to dictionary, but strip beforehand
            instance_value_counts[instance_value] = 0
            instance_proportions[instance_value] = 0
        #increment at dictionary key, make sure you strip beforehand
        instance_value_counts[instance_value] += 1
        instance_proportions[instance_value] = float(instance_value_counts[instance_value]) / len(instances)
    return instance_proportions
Ejemplo n.º 51
0
def oppositeSums(arr):

    origin = arr
    n = len(arr)
    result = len(arr)
    # seen = defaultdict(lambda: 0)
    # seen_rev = defaultdict(lambda: 0)
    diff = defaultdict(lambda: 0)
    for num in arr:
        rev = int(str(num)[::-1])
        sub = num - rev
        # if num in seen:
        #     result += seen[num]
        # elif num in seen_rev:
        #     result += seen_rev[num]
        # seen[num] += 1
        # seen_rev[rev] += 1
        if sub in diff:
            result += diff[sub]
        diff[sub] += 1
    return result
def df_to_edges(graph: pd.DataFrame):
    graph_dict = graph.T.to_dict()
    edges = defaultdict(list)

    for src_id, row_dict in graph_dict.items():
        src_x = row_dict['x']
        src_y = row_dict['y']

        for dst_id in row_dict['adj']:
            dst_x = graph_dict[dst_id]['x']
            dst_y = graph_dict[dst_id]['y']

            weight = graph_dict[dst_id]['weight']
            if not np.isnan(weight):
                cost = weight * gaversin_distance(src_x, src_y, dst_x, dst_y)
            else:
                cost = gaversin_distance(src_x, src_y, dst_x, dst_y)

            edges[src_id].append((cost, dst_id))

    return edges
Ejemplo n.º 53
0
def solution(N, road, K):
    answer = 1

    cost = defaultdict(lambda: [123456789for _ in range(N+1)])
    connection_list = [[] for _ in range(N+1)]
    table = [123456789 for _ in range(N+1)]
    need_to_check = deque()
    answer_set = set()
    for r in road:
        if cost[r[0]][r[1]] == 123456789 : connection_list[r[0]].append(r[1])
        if cost[r[1]][r[0]] == 123456789 : connection_list[r[1]].append(r[0])
        cost[r[0]][r[1]] = min(r[2],cost[r[0]][r[1]])
        cost[r[1]][r[0]] = min(r[2],cost[r[1]][r[0]])

    for i in connection_list[1]:
        need_to_check.append(i)
        table[i] = cost[1][i]
        if table[i] <= K:
            if i not in answer_set:
                answer += 1
                answer_set.add(i)


    answer_set.add(1)

    while len(need_to_check) != 0 :
        check = need_to_check.popleft()
        for i in connection_list[check]:
            if table[i] > table[check] + cost[check][i]: # update
                table[i] = table[check] + cost[check][i]
                need_to_check.append(i)

                if table[i]<=K:
                    if i not in answer_set:
                        answer += 1
                        answer_set.add(i)



    return answer
Ejemplo n.º 54
0
    def get_jar(self, idc=-1):
        """
            Get the content of all files present in the JAR file stored in the
            field 9.184. The returned dictionnary contains the as follow::
                
                {
                    'file name': 'file content',
                    ...
                }
            
            The content of the files are not parsed, but returned as string value.
            
            :param idc: IDC value.
            :type idc: int
                        
            :return: Content of all files stored in the JAR file.
            :rtype: dict
        """
        idc = self.checkIDC(9, idc)

        data = self.get_field("9.184", idc)
        if data != None:
            data = base64.decodestring(data)

            buffer = StringIO()
            buffer.write(data)

            ret = defaultdict()

            with zipfile.ZipFile(buffer, "r") as zip:
                for f in zip.namelist():
                    name, _ = os.path.splitext(f)

                    with zip.open(f, "r") as fp:
                        ret[name] = fp.read()

            return dict(ret)

        else:
            return None
    def _compute(self):
        tvs = [x.getResult() for x in self._children]
        tvs = tvs[0:-1]
        from numpy import array

        tvStarts = [array(x.startsAsNumpyArray(), dtype='int64') for x in tvs]
        tvEnds = [array(x.endsAsNumpyArray(), dtype='int64') for x in tvs]

        numTracks = len(tvStarts)
        assert numTracks < 34, 'Maximum supported nr. of tracks for this statistic is 33'

        localBinSize = self._localBinSize
        binSize = self._binSizeStat.getResult()
        bins = np.arange(0, binSize, localBinSize)

        s = []
        for track in tvStarts:
            s.append(len(track))

        E = np.sum(s) / float(len(bins))

        O = np.zeros((len(bins), 1))
        binPositions = [
            np.floor_divide(t_starts, localBinSize) for t_starts in tvStarts
        ]

        for track in binPositions:
            for binPos in track:
                O[binPos, 0] += 1
        return O, E
        if not E > 0:
            T = 0
        else:
            T = np.sum(np.power((O - E), 2) / E)
#        print "--------------" + self.__class__.__name__ +  "-----------------------"
#        print self._region, T,E, O

        r = defaultdict(int)
        r[0] = T
        return [T]
Ejemplo n.º 56
0
    def validate(self, classifier):
        '''Fuert die Kreuzvalidierung mit dem Klassifikator 'classifier' durch.

        Params:
            classifier: Objekt, das folgende Methoden implementiert (siehe oben)
                estimate(train_samples, train_labels)
                classify(test_samples) --> test_labels

        Returns:
            crossval_overall_result: Erkennungsergebnis der gesamten Kreuzvalidierung
                (ueber alle Folds)
            crossval_class_results: Liste von Tuple (category, result) die klassenweise
                Erkennungsergebnisse der Kreuzvalidierung enthaelt.
        '''
        crossval_overall_list = []
        crossval_class_dict = defaultdict(list)
        for fold_index in range(self.n_folds):
            train_samples, train_labels, test_samples, test_labels = self.samples_fold(
                fold_index)
            classifier.estimate(train_samples, train_labels)
            estimated_test_labels = classifier.classify(test_samples)
            classifier_eval = ClassificationEvaluator(estimated_test_labels,
                                                      test_labels)
            crossval_overall_list.append(list(classifier_eval.error_rate()))
            crossval_class_list = classifier_eval.category_error_rates()
            for category, err, n_wrong, n_samples in crossval_class_list:
                crossval_class_dict[category].append([err, n_wrong, n_samples])

        crossval_overall_mat = np.array(crossval_overall_list)
        crossval_overall_result = CrossValidation.crossval_results(
            crossval_overall_mat)

        crossval_class_results = []
        for category in sorted(crossval_class_dict.keys()):
            crossval_class_mat = np.array(crossval_class_dict[category])
            crossval_class_result = CrossValidation.crossval_results(
                crossval_class_mat)
            crossval_class_results.append((category, crossval_class_result))

        return crossval_overall_result, crossval_class_results
Ejemplo n.º 57
0
def createFasta(input_file, append_file, order):
    fastaInDict = dict()
    orderDictSc = OrderedDict()
    with open(input_file) as FASTAIN, open(append_file,
                                           "a") as APP, open(order) as ORD:
        fastaParse = SeqIO.parse(FASTAIN, "fasta")
        fastaOutDict = defaultdict(list)
        for fastaSeq in fastaParse:
            s = str(fastaSeq.seq)
            idFasta = fastaSeq.id
            fastaInDict[idFasta] = s

        keyMap = {
            "Smic.scaffold9__1420062__1920061":
            "Smic.scaffold9__1420062__2138115",
            "Smic.scaffold236__1__500000": "Smic.scaffold236__1__795886",
            "Smic.scaffold338__1__500000": "Smic.scaffold338__1__646490",
            "Smic.scaffold458__1__500000": "Smic.scaffold458__1__544999"
        }

        for line in ORD:
            line = line.rstrip("\n")
            val = line.split("\t")

            k = val[0]
            if (k in keyMap):
                k = keyMap[k]
                seq = fastaInDict[k][0:500000]
            else:
                seq = fastaInDict[k]

            fastaOutDict[val[1]].append(seq)
            orderDictSc[val[1]] = len(seq)

        for keys in orderDictSc.keys():
            chrom = "cluster" + keys
            print("Writing Chromosome " + str(chrom))
            APP.write(">" + chrom + "\n")
            APP.write("".join(fastaOutDict[keys]))
            APP.write("\n")
Ejemplo n.º 58
0
def assignScaffold(input_file, output_file, pos):
    scDict = defaultdict(list)
    s = ScaffoldList()
    count = 0
    with open(input_file, "r") as INP, open(output_file,
                                            "w") as OUT, open(pos, "r") as PAR:
        for line in INP:
            line = line.rstrip("\n")
            v = line.split("\t")
            a = v[3].split(":")
            b = a[1].split("-")
            s.add(v[0], a[0], b[0], b[1], count, scDict)
            count += 1
#         for line in PAR:
#             line=line.rstrip("\n")
#             v=line.split("\t")
#             b=v[1].split(" ")
#             pos1=b[1]
#             pos2=b[3]
#             side1=b[0]+"-"+pos1
#             side2=b[2]+"-"+pos2
#             side1Sc=s.search(side1,scDict[b[0]])
#             side2Sc=s.search(side2,scDict[b[2]])
#             OUT.write(line+"\t"+side1Sc.scaffold+"\t"+side2Sc.scaffold+"\n")
        for line in PAR:
            line = line.rstrip("\n")
            v = line.split(" ")
            pos1 = v[4]
            pos2 = v[5]
            side1 = v[2] + "-" + pos1
            side2 = v[2] + "-" + pos2
            print("read", v[0])
            print("side1", side1)
            print("side2", side2)

            side1Sc = s.search(side1, scDict[v[2]])
            side2Sc = s.search(side2, scDict[v[2]])
            OUT.write(line + "\t" + side1Sc.scaffold + "\t" +
                      side2Sc.scaffold + "\n")
Ejemplo n.º 59
0
    def _write_worklist(self, dest_plate_id, worklist):
        '''Write worklist.'''
        worklist_id = dest_plate_id + '_worklist'
        outfile = os.path.join(self.__outdir, worklist_id + '.csv')

        writer = csv.writer(open(outfile, 'a+'))
        worklist_map = defaultdict(list)

        for entry in sorted(worklist, key=lambda x: x[3]):
            worklist_map[entry[1]].append(entry)

        for idx in cycle(range(0, self.__rows * self.__cols)):
            if worklist_map[idx]:
                entry = worklist_map[idx].pop(0)
                writer.writerow([
                    plate_utils.get_well(val)
                    if idx == 1 or idx == 3 else str(val)
                    for idx, val in enumerate(entry)
                ])

            if not sum([len(lst) for lst in worklist_map.values()]):
                break
 def minNumberOfSemesters(self, n, dependencies, k):
     """
     :type n: int
     :type dependencies: List[List[int]]
     :type k: int
     :rtype: int
     """
     indegree = [0] * (n + 1)
     graph = defaultdict(list)
     for edge in dependencies:
         graph[edge[0]].append(edge[1])
         indegree[edge[1]] += 1
     print(indegree)
     print(graph)
     q = []
     for i in range(1, n + 1):
         if indegree[i] == 0:
             q.append(i)
     count = 0
     ans = 0
     while q:
         u = q.pop()