Example #1
0
def tspim2tensorformat(tsfile,
                       tensorfile,
                       tunit='s',
                       tbins='h',
                       idstartzero=True):
    offset = 0 if idstartzero else -1
    propdict = {}
    with myreadfile(tsfile, 'rb') as fts, myreadfile(tensorfile, 'wb') as fte:
        for line in fts:
            k, v = line.strip().split(':')
            propdict[k] = [v]
        for k, vs in propdict.iteritems():
            u, b = k.strip().split('-')
            if idstartzero is False:
                u = str(int(u) + offset)
                b = str(int(b) + offset)
            tss = vs[0].strip().split(' ')
            tss = map(int, tss)
            if tunit == 's':
                'time unit is second'
                if tbins == 'h':
                    'time bin size is hour'
                    tss = np.array(tss, dtype=int) / 3600
                elif tbins == 'd':
                    'time bin size is day'
                    tss = np.array(tss, dtype=int) / (3600 * 24)
            if type(tbins) is int:
                tss = np.array(tss, dtype=int) / tbins
            tss = map(str, tss)
            for i in xrange(len(tss)):
                fte.write(','.join((u, b, tss[i], '1')))
                fte.write('\n')
        fts.close()
        fte.close()
    return
Example #2
0
 def load_from_edgeproperty(self, profnm, mtype=coo_matrix, dtype=int):
     'load the graph edge property, time stamps, ratings, or text vector'
     self.idstartzero = True  #record for output recovery
     offset = -1 if self.idstartzero is False else 0
     'sparse matrix has special meaning of 0, so property index start from 1'
     self.eprop = [np.array([])
                   ]  #make the idx start from 1 in sparse matrix
     with myreadfile(profnm, 'rb') as fin:
         idx = 1
         x, y, data = [], [], []
         for line in fin:
             um, prop = line.strip().split(':')
             u, m = um.split('-')
             u = int(u) + offset
             m = int(m) + offset
             x.append(u)
             y.append(m)
             data.append(idx)  #data store the index of edge properties
             prop = np.array(prop.strip().split()).astype(dtype)
             self.eprop.append(prop)
             idx += 1
         fin.close()
         self.edgeidxm = mtype((data, (x, y)),
                               shape=(max(x) + 1, max(y) + 1))
         self.edgeidxmr = self.edgeidxm.tocsr()
         self.edgeidxmc = self.edgeidxm.tocsc()
         self.edgeidxml = self.edgeidxm.tolil()
         self.edgeidxmlt = self.edgeidxm.transpose().tolil()
         self.eprop = np.array(self.eprop)
     return
Example #3
0
def loadtensor2matricization(tensorfile, sumout=[], mtype=coo_matrix,
                             weighted=True, dtype=int):
    'sumout: marginized (sumout) the given ways'
    matcols={}
    rindexcols={}
    xs, ys, data = [], [], []
    with myreadfile(tensorfile, 'rb') as f:
        for line in f:
            elems = line.strip().split(',')
            elems = np.array(elems)
            u = int(elems[0])
            colidx = range(1,len(elems)-1) #remove sumout
            colidx = set(colidx) - set(list(sumout))
            colidx = sorted(list(colidx))
            col=' '.join(elems[colidx])
            if col not in matcols:
                idx = len(matcols)
                matcols[col] = idx
                rindexcols[idx]=col
            cid = matcols[col]
            w = dtype(elems[-1])
            xs.append(u)
            ys.append(cid)
            data.append(w)
        nrow, ncol = max(xs)+1, max(ys)+1
        sm = mtype( (data, (xs, ys)), shape=(nrow, ncol), dtype=dtype )
        if weighted is False:
            sm.data[0:] = dtype(1)
        f.close()

    return sm, rindexcols
Example #4
0
def pim2tensorformat(tsfile, ratefile, tensorfile, tunit='s', tbins='h'):
    'convert the pim files: tsfile, ratefile into tensor file, i.e. tuples'
    rbins = lambda x: 0 if x < 2.5 else 1 if x <= 3.5 else 2  #lambda x: x
    propdict = {}
    with myreadfile(tsfile, 'rb') as fts, myreadfile(ratefile, 'rb') as frt,\
            open(tensorfile, 'wb') as fte:
        for line in fts:
            k, v = line.strip().split(':')
            propdict[k] = [v]
        for line in frt:
            k, v = line.strip().split(':')
            propdict[k].append(v)
        for k, vs in propdict.iteritems():
            u, b = k.strip().split('-')
            tss = vs[0].strip().split(' ')
            tss = map(int, tss)
            if tunit == 's':
                'time unit is second'
                if tbins == 'h':
                    'time bin size is hour'
                    tss = np.array(tss, dtype=int) / 3600
                elif tbins == 'd':
                    'time bin size is day'
                    tss = np.array(tss, dtype=int) / (3600 * 24)
            'no matter what the tunit is'
            if type(tbins) is int:
                tss = np.array(tss, dtype=int) / tbins
            tss = map(str, tss)
            'process ts'
            rts = vs[1].strip().split(' ')
            rts = map(float, rts)
            digrs = []
            for r1 in rts:
                r = rbins(r1)
                digrs.append(r)
            digrs = map(int, digrs)
            digrs = map(str, digrs)
            for i in xrange(len(tss)):
                fte.write(','.join((u, b, tss[i], digrs[i], '1')))
                fte.write('\n')
        fts.close()
        frt.close()
        fte.close()
    return