Exemple #1
0
 def parse(self):
     for i,c in enumerate(self.load()):
         #self.output_all(i+1)
         print 'parse %d th file' % i
         if i % 100: print '.. has parsed %d files' % i
         movie = 0
         lines = c.split('\n')
         for l in lines:
             if len(l.split(',')) < 3 and common.canfind(l, ':'):
                 '''
                 movie id
                 '''
                 words = l.split(':')
                 movie = int(words[0])
                 assert movie<17779, 'Error: movieid greater than max 17770'
             else:
                 #get user record
                 #parse line
                 words = l.split(',')
                 try:
                     userid = int(words[0])
                     rank = int(words[1])
                 except:
                     continue
                 assert userid < 2649440, 'Error: userid greater than max'
                 assert rank <= 5, 'Error: rank greater than max 5'
                 #add record to dic
                 if not self.datadic.has_key(userid):
                     #create new key
                     self.datadic[userid] = []
                 self.datadic[userid].append((movie, rank))
Exemple #2
0
 def parse(self):
     for i,c in enumerate(self.load()):
         lines = c.split('\n')
         for l in lines:
             if len(l.split(',')) < 3: continue
             if common.canfind(l, ':'): continue
             # the line contains userid
             words = l.split(',')
             userid = int(words[0])
             if userid > 2650000:
                 print '.. Error: get userid', userid
                 break
             self.set.add(userid)
         if i % 100 == 0:
             print '.. has parsed %d files' % i
     print '.. begin sort ids'
     print '.. get list'
     self._list = list(self.set)
     print '.. list sort'
     self._list.sort()
     print '.. end parse.'