def write_vector(vector, path): #no incluye colnames aux = [] for v in vector[1:]: aux.append(str(v[0])) aux = '\n'.join(aux) save_data(path, aux)
data2 = transform_raw_charset(all_cs, data2) all_cs = None delta = {} # {cs_from:{cs_to:[subs], ...}, ...} for i in range(1, max(max(data1), max(data2)) + 1): if i in data1: cs_from = data1[i] del data1[i] else: cs_from = 0 #frozenset([]) # creation if i in data2: cs_to = data2[i] del data2[i] else: cs_to = 0 #frozenset([]) # deletion if cs_from not in delta: delta[cs_from] = {cs_to: [i]} else: if cs_to not in delta[cs_from]: delta[cs_from][cs_to] = [i] else: delta[cs_from][cs_to].append(i) buff = '' for f in delta: for t in delta[f]: buff += '%d\t%d\t%d\n' % (f, t, len(delta[f][t])) save_data(out, buff)
def write_vector(vector, path): aux = [str(v) for v in vector] aux = '\n'.join(aux) save_data(path, aux)
if __name__ == '__main__': """create dataset.tsv. Matrix with charsets vs dates""" cs = load_all_cs_file(all_charset_path()) #fronzenset:num cs = {cs[c]:c for c in cs} #num:frozenset cs = [str(cs[i]) for i in range(len(cs))] ret = [] for date in DATES: ti = time() path = get_transformed_charset_path(date) data = load_transformed_charset(path) cur = [0]*len(cs) for d in data: cur[d[0]] = d[1] cur = [str(c) for c in cur] cur.insert(0, str(date)) ret.append(cur) tf = time() print(tf-ti, date) cs.insert(0, 'charset') ret.insert(0, cs) ret = [list(_) for _ in zip(*ret)] ret = ['\t'.join(r) for r in ret] ret = '\n'.join(ret) save_data(tsv_path(), ret)
def write_names(vector, path): #no incluye colnames aux = '\n'.join(vector[1:]) save_data(path, aux)