words, distance = vq(des_list[i][1],voc) for w in words: test_features[i][w] += 1 # Perform Tf-Idf vectorization nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0) idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32') # Scale the features test_features = stdSlr.transform(test_features) # Perform the predictions predictions = [] for i in clf.predict(test_features): predictions.append(i) label_file = args["label"] dict=utils.readDict(label_file) count=0 total=len(zip(image_paths, predictions)) for image_path, prediction in zip(image_paths, predictions): print image_path,prediction,dict[utils.getFileName(image_path)] if(prediction==dict[utils.getFileName(image_path)]): count+=1 print "total: "+str(total) print "accuracy: "+str((1.0*count/total*100))+"%"
##Take raw data and extract higher level features using functions in metrics import utils import itertools from metrics import similarityMatrix searches = [[112],['house','senate']] for search in itertools.product(*searches): congress = str(search[0]) chamber = search[1] voteData = utils.readDict('votes-'+congress+chamber) similarities = similarityMatrix(voteData) utils.writeDict({'sims':similarities,'people':people},'similarities-'+congress+chamber)
import random import mytools import os, time import pandas as pd import numpy as np import sys import utils from tqdm import tqdm import re pat1 = re.compile(r'[\u00e0-\u00ff]+') # 读取字典 Dict = utils.readDict(path="utils/filter_vocab.txt") def load_raw(html, loc): zh_file_name = "../0_data/zhit-0825/" + html with open(zh_file_name, encoding='UTF-8') as f: source = f.read() start, end = loc.split(":") start, end = int(start), int(end) zh_text = source[start:end] return zh_text def save_to_csv(results, default_path="submission", return_min=False): results = np.array(results) results = np.array(list(set([tuple(t) for t in results])))
##Checking out what we've done so far import networkx as nx from utils import readDict, getPeople import numpy as np from dataClasses import * from influenceGraph import getInfluenceList, createGraph import matplotlib.pyplot as plt congress = '112' chamber = 'senate' dat = readDict('similarities-'+congress+chamber) sims = dat['sims'] member_ids = dat['people'] print len(member_ids) influenceList = getInfluenceList(sims,False,40,70) fullPeople = getPeople(member_ids,congress,chamber) influenceGraph = createGraph(influenceList,True,sims,lambda w: w/100.0) partyColors = map(lambda x: x.partyColor(), fullPeople) labels = dict(zip(range(len(fullPeople)), map(lambda x: x.pretty(), fullPeople))) ecolors = map(lambda e: e[2]['weight'], influenceGraph.edges(data=True)) pos=nx.spring_layout(influenceGraph) nx.draw(influenceGraph,pos,node_color=partyColors,node_size=80,edge_color=ecolors,with_labels=False)#,labels=labels) plt.savefig('pos_influence_graph.png') plt.show()