def tspim2tensorformat(tsfile, tensorfile, tunit='s', tbins='h', idstartzero=True): offset = 0 if idstartzero else -1 propdict = {} with myreadfile(tsfile, 'rb') as fts, myreadfile(tensorfile, 'wb') as fte: for line in fts: k, v = line.strip().split(':') propdict[k] = [v] for k, vs in propdict.iteritems(): u, b = k.strip().split('-') if idstartzero is False: u = str(int(u) + offset) b = str(int(b) + offset) tss = vs[0].strip().split(' ') tss = map(int, tss) if tunit == 's': 'time unit is second' if tbins == 'h': 'time bin size is hour' tss = np.array(tss, dtype=int) / 3600 elif tbins == 'd': 'time bin size is day' tss = np.array(tss, dtype=int) / (3600 * 24) if type(tbins) is int: tss = np.array(tss, dtype=int) / tbins tss = map(str, tss) for i in xrange(len(tss)): fte.write(','.join((u, b, tss[i], '1'))) fte.write('\n') fts.close() fte.close() return
def load_from_edgeproperty(self, profnm, mtype=coo_matrix, dtype=int): 'load the graph edge property, time stamps, ratings, or text vector' self.idstartzero = True #record for output recovery offset = -1 if self.idstartzero is False else 0 'sparse matrix has special meaning of 0, so property index start from 1' self.eprop = [np.array([]) ] #make the idx start from 1 in sparse matrix with myreadfile(profnm, 'rb') as fin: idx = 1 x, y, data = [], [], [] for line in fin: um, prop = line.strip().split(':') u, m = um.split('-') u = int(u) + offset m = int(m) + offset x.append(u) y.append(m) data.append(idx) #data store the index of edge properties prop = np.array(prop.strip().split()).astype(dtype) self.eprop.append(prop) idx += 1 fin.close() self.edgeidxm = mtype((data, (x, y)), shape=(max(x) + 1, max(y) + 1)) self.edgeidxmr = self.edgeidxm.tocsr() self.edgeidxmc = self.edgeidxm.tocsc() self.edgeidxml = self.edgeidxm.tolil() self.edgeidxmlt = self.edgeidxm.transpose().tolil() self.eprop = np.array(self.eprop) return
def loadtensor2matricization(tensorfile, sumout=[], mtype=coo_matrix, weighted=True, dtype=int): 'sumout: marginized (sumout) the given ways' matcols={} rindexcols={} xs, ys, data = [], [], [] with myreadfile(tensorfile, 'rb') as f: for line in f: elems = line.strip().split(',') elems = np.array(elems) u = int(elems[0]) colidx = range(1,len(elems)-1) #remove sumout colidx = set(colidx) - set(list(sumout)) colidx = sorted(list(colidx)) col=' '.join(elems[colidx]) if col not in matcols: idx = len(matcols) matcols[col] = idx rindexcols[idx]=col cid = matcols[col] w = dtype(elems[-1]) xs.append(u) ys.append(cid) data.append(w) nrow, ncol = max(xs)+1, max(ys)+1 sm = mtype( (data, (xs, ys)), shape=(nrow, ncol), dtype=dtype ) if weighted is False: sm.data[0:] = dtype(1) f.close() return sm, rindexcols
def pim2tensorformat(tsfile, ratefile, tensorfile, tunit='s', tbins='h'): 'convert the pim files: tsfile, ratefile into tensor file, i.e. tuples' rbins = lambda x: 0 if x < 2.5 else 1 if x <= 3.5 else 2 #lambda x: x propdict = {} with myreadfile(tsfile, 'rb') as fts, myreadfile(ratefile, 'rb') as frt,\ open(tensorfile, 'wb') as fte: for line in fts: k, v = line.strip().split(':') propdict[k] = [v] for line in frt: k, v = line.strip().split(':') propdict[k].append(v) for k, vs in propdict.iteritems(): u, b = k.strip().split('-') tss = vs[0].strip().split(' ') tss = map(int, tss) if tunit == 's': 'time unit is second' if tbins == 'h': 'time bin size is hour' tss = np.array(tss, dtype=int) / 3600 elif tbins == 'd': 'time bin size is day' tss = np.array(tss, dtype=int) / (3600 * 24) 'no matter what the tunit is' if type(tbins) is int: tss = np.array(tss, dtype=int) / tbins tss = map(str, tss) 'process ts' rts = vs[1].strip().split(' ') rts = map(float, rts) digrs = [] for r1 in rts: r = rbins(r1) digrs.append(r) digrs = map(int, digrs) digrs = map(str, digrs) for i in xrange(len(tss)): fte.write(','.join((u, b, tss[i], digrs[i], '1'))) fte.write('\n') fts.close() frt.close() fte.close() return