Exemple #1
0
    words, distance = vq(des_list[i][1],voc)
    for w in words:
        test_features[i][w] += 1

# Perform Tf-Idf vectorization
nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

# Scale the features
test_features = stdSlr.transform(test_features)

# Perform the predictions
predictions = []
for i in clf.predict(test_features):
    predictions.append(i)

label_file = args["label"]
dict=utils.readDict(label_file)


count=0
total=len(zip(image_paths, predictions))
for image_path, prediction in zip(image_paths, predictions):
    print image_path,prediction,dict[utils.getFileName(image_path)]
    if(prediction==dict[utils.getFileName(image_path)]):
        count+=1

print "total: "+str(total)
print "accuracy: "+str((1.0*count/total*100))+"%"

Exemple #2
0
##Take raw data and extract higher level features using functions in metrics
import utils
import itertools
from metrics import similarityMatrix

searches = [[112],['house','senate']]

for search in itertools.product(*searches):
  congress = str(search[0])
  chamber = search[1]
  voteData = utils.readDict('votes-'+congress+chamber)
  similarities = similarityMatrix(voteData)
  utils.writeDict({'sims':similarities,'people':people},'similarities-'+congress+chamber)
import random
import mytools
import os, time
import pandas as pd
import numpy as np
import sys
import utils
from tqdm import tqdm
import re
pat1 = re.compile(r'[\u00e0-\u00ff]+')

# 读取字典
Dict = utils.readDict(path="utils/filter_vocab.txt")


def load_raw(html, loc):
    zh_file_name = "../0_data/zhit-0825/" + html
    with open(zh_file_name, encoding='UTF-8') as f:
        source = f.read()
    start, end = loc.split(":")
    start, end = int(start), int(end)
    zh_text = source[start:end]
    return zh_text


def save_to_csv(results, default_path="submission", return_min=False):

    results = np.array(results)

    results = np.array(list(set([tuple(t) for t in results])))
Exemple #4
0
##Checking out what we've done so far
import networkx as nx
from utils import readDict, getPeople
import numpy as np
from dataClasses import *
from influenceGraph import getInfluenceList, createGraph
import matplotlib.pyplot as plt

congress = '112'
chamber = 'senate'

dat = readDict('similarities-'+congress+chamber)
sims = dat['sims']
member_ids = dat['people']

print len(member_ids)

influenceList = getInfluenceList(sims,False,40,70)
fullPeople = getPeople(member_ids,congress,chamber)
influenceGraph = createGraph(influenceList,True,sims,lambda w: w/100.0)
partyColors = map(lambda x: x.partyColor(), fullPeople)
labels = dict(zip(range(len(fullPeople)), map(lambda x: x.pretty(), fullPeople)))
ecolors = map(lambda e: e[2]['weight'], influenceGraph.edges(data=True))
pos=nx.spring_layout(influenceGraph)
nx.draw(influenceGraph,pos,node_color=partyColors,node_size=80,edge_color=ecolors,with_labels=False)#,labels=labels)
plt.savefig('pos_influence_graph.png')
plt.show()