tmp = line.split() if len(tmp) == 3: i = int(tmp[0]) w = tmp[2] l = int(tmp[1]) if i == prev: wtol[w].append(l) else: wtol[w] = [l] itow[i] = w prev = i elif len(tmp) == 2: itow[int(tmp[0])] = tmp[1] fwl.close() wtolu = sm.readwl("/home/ec2-user/git/statresult/wordslist_dsw_top1000.txt") tmp = [] bk = {} fbk = open(sys.argv[1],"r") for line in fbk: if len(line) < 2: for term in tmp: bk[term] = tmp tmp =[] else: tmp.append(itow[int(line.split()[0])]) fbk.close() otype = int(sys.argv[3])
otype = 0 if sys.argv[6] == 'stdout': otype = 1 def vecof(lines, a, wtol, kk): vec = np.zeros(kk) for line in lines: line = line.strip('\n') vec = vec + a[:, wtol[line]] return vec stype = int(sys.argv[5]) zipf = float(sys.argv[4]) wtola = sm.readwl("/home/ec2-user/git/statresult/wordslist_dsw.txt") #zipf = crand.zipf_init(len(wtola)) if stype == 3: wtolu = sm.readwl( "/home/ec2-user/git/statresult/wordslist_top10000_dsw.txt") else: wtolu = wtola a = np.load('/home/ec2-user/data/classinfo/vt.npy') #lsa result b = a ukk = a.shape[0] s = None akk = ukk if stype == 3: # b = np.load('/home/ec2-user/git/statresult/lda-30-2000-phi.npy') # s = np.load('/home/ec2-user/git/statresult/lda-30-2000-pz.npy') b = np.load('/home/ec2-user/git/statresult/lda-32-2000-top10000-phi.npy')
#!/usr/bin/env python # -*- coding: utf-8 -*- import os import sys import math from os import path sys.path.append( path.dirname( path.dirname( path.abspath(__file__) ) ) ) from pythonlib import semantic as sm num = 0 tin = 0 wtol = sm.readwl("/home/ec2-user/git/statresult/wordslist_dsw_top1000.txt") for root, dirs, files in os.walk('/home/ec2-user/data/topics/'): for name in files: filename = root + '/' + name if name.isdigit(): fin = open(filename+'.txt','r') temp = fin.readlines() fin.close() for i in temp: i = i.strip('\n') num = num +1 if i in wtol: tin = tin + 1 print num,tin