def transform(self):
        (max, min) = self.getMaxMin()
        self.reader.open()
        dim = self.reader.XDim - 1  #去除常数项
        dis = [0] * dim
        for i in xrange(dim):
            dis[i] = max[i] - min[i]

        self.writer = BinWriter(self.filename.rstrip('.bin') + '.norm.bin')
        self.writer.open(self.reader.LineCount, self.reader.Dim)

        for k in xrange(self.reader.LineCount):
            (x, userid, itemid, label) = self.reader.readline()
            for i in xrange(dim):
                x[i + 1] = (x[i + 1] - min[i]) / dis[i]
            self.writer.writeline(x, userid, itemid, label)

            if k % 10000 == 0:
                print '%d/%d' % (k, self.reader.LineCount)

        self.writer.close()
    def transform(self):
        (max,min) = self.getMaxMin()
        self.reader.open()
        dim = self.reader.XDim - 1  #去除常数项
        dis = [0] * dim
        for i in xrange(dim):
            dis[i] = max[i] - min[i]

        self.writer = BinWriter(self.filename.rstrip('.bin') + '.norm.bin')
        self.writer.open(self.reader.LineCount,self.reader.Dim)

        for k in xrange(self.reader.LineCount):
            (x,userid,itemid,label) = self.reader.readline()
            for i in xrange(dim):
                x[i + 1] = (x[i + 1] - min[i]) / dis[i]
            self.writer.writeline(x,userid,itemid,label)

            if k % 10000 == 0:
                print '%d/%d' % (k,self.reader.LineCount)

        self.writer.close()
class Normailzer(object):
    """description of class"""
    def __init__(self,filename):
        self.filename = filename
        self.reader = BinReader(filename)

    def getMaxMin(self):
        self.reader.open()
        dim = self.reader.XDim - 1  #去除常数项
        max = [0] * dim
        min = [0] * dim
        for k in xrange(self.reader.LineCount):
            (x,userid,itemid,label) = self.reader.readline()
            for i in xrange(dim):
                if x[i + 1] > max[i]:
                    max[i] = x[i + 1]
                if x[i + 1] < min[i]:
                    min[i] = x[i + 1]
            if k % 10000 == 0:
                print '%d/%d' % (k,self.reader.LineCount)
        self.reader.close()
        return (max,min)
    
    ##调用该函数,实现归一化
    def transform(self):
        (max,min) = self.getMaxMin()
        self.reader.open()
        dim = self.reader.XDim - 1  #去除常数项
        dis = [0] * dim
        for i in xrange(dim):
            dis[i] = max[i] - min[i]

        self.writer = BinWriter(self.filename.rstrip('.bin') + '.norm.bin')
        self.writer.open(self.reader.LineCount,self.reader.Dim)

        for k in xrange(self.reader.LineCount):
            (x,userid,itemid,label) = self.reader.readline()
            for i in xrange(dim):
                x[i + 1] = (x[i + 1] - min[i]) / dis[i]
            self.writer.writeline(x,userid,itemid,label)

            if k % 10000 == 0:
                print '%d/%d' % (k,self.reader.LineCount)

        self.writer.close()
class Normailzer(object):
    """description of class"""
    def __init__(self, filename):
        self.filename = filename
        self.reader = BinReader(filename)

    def getMaxMin(self):
        self.reader.open()
        dim = self.reader.XDim - 1  #去除常数项
        max = [0] * dim
        min = [0] * dim
        for k in xrange(self.reader.LineCount):
            (x, userid, itemid, label) = self.reader.readline()
            for i in xrange(dim):
                if x[i + 1] > max[i]:
                    max[i] = x[i + 1]
                if x[i + 1] < min[i]:
                    min[i] = x[i + 1]
            if k % 10000 == 0:
                print '%d/%d' % (k, self.reader.LineCount)
        self.reader.close()
        return (max, min)

    ##调用该函数,实现归一化
    def transform(self):
        (max, min) = self.getMaxMin()
        self.reader.open()
        dim = self.reader.XDim - 1  #去除常数项
        dis = [0] * dim
        for i in xrange(dim):
            dis[i] = max[i] - min[i]

        self.writer = BinWriter(self.filename.rstrip('.bin') + '.norm.bin')
        self.writer.open(self.reader.LineCount, self.reader.Dim)

        for k in xrange(self.reader.LineCount):
            (x, userid, itemid, label) = self.reader.readline()
            for i in xrange(dim):
                x[i + 1] = (x[i + 1] - min[i]) / dis[i]
            self.writer.writeline(x, userid, itemid, label)

            if k % 10000 == 0:
                print '%d/%d' % (k, self.reader.LineCount)

        self.writer.close()
Example #5
0
    sys.exit(1)

# When destructing, we need to make the directory if it doesn't exist
if args.destruct:
    if not os.path.exists(args.destruct):
        os.makedirs(args.destruct)
    fh = FCH_Root()
    with BinReader(args.path) as br:
        fh.fromBinary(br)
    if not args.quiet:
        fh.printInfo()
    fh.destruct(args.destruct, overwrite=args.overwrite)
elif args.construct:
    fh = FCH_Root()
    fh.construct(args.construct)
    with BinWriter(args.path, overwrite=args.overwrite) as wr:
        fh.toBinary(wr)
    # Sanity read it again!
    with BinReader(args.path) as br:
        fh.fromBinary(br)
    if not args.quiet:
        fh.printInfo()
else:
    # Default is read the file and print info
    fh = FCH_Root()
    with BinReader(args.path) as br:
        fh.fromBinary(br)
    if not args.quiet:
        fh.printInfo()

# vim:ts=4:sw=4:et
Example #6
0
from BinReader import BinReader
from BinWriter import BinWriter

TOPN = 198000

reader = BinReader(ur'F:\AliRecommendHomeworkData\1212新版\test18.expand.norm.bin')
reader.open()

writer = BinWriter(reader._filename.rstrip('.bin') + '.top.bin')
writer.open(TOPN,reader.Dim)

with open('an.csv') as f:
    items = set(f.readlines())


posi = 0
for i in range(reader.LineCount):
    (x,userid,itemid,label) = reader.readline()

    if i < 800000:
        continue
    if '%d,%d\n' % (userid,itemid) in items:
        label = 1
        posi+=1
    else:
        label = 0
    writer.writeline(x,userid,itemid,label)
   
print ur'正例个数:',posi
writer.close()
reader.close()