Beispiel #1
0
 def loadRelationship(conf, filePath):
     socialConfig = LineConfig(conf['social.setup'])
     relation = []
     print 'loading social data...'
     with open(filePath) as f:
         relations = f.readlines()
         # ignore the headline
     if socialConfig.contains('-header'):
         relations = relations[1:]
     # order of the columns
     order = socialConfig['-columns'].strip().split()
     if len(order) <= 2:
         print 'The social file is not in a correct format.'
     for lineNo, line in enumerate(relations):
         items = split(' |,|\t', line.strip())
         if len(order) < 2:
             print 'The social file is not in a correct format. Error: Line num %d' % lineNo
             exit(-1)
         userId1 = items[int(order[0])]
         userId2 = items[int(order[1])]
         if len(order) < 3:
             weight = 1
         else:
             weight = float(items[int(order[2])])
         relation.append([userId1, userId2, weight])
     return relation
Beispiel #2
0
 def loadRelationship(conf, filePath):
     socialConfig = LineConfig(conf['social.setup'])
     relation = []
     print 'loading social data...'
     with open(filePath) as f:
         relations = f.readlines()
         # ignore the headline
     if socialConfig.contains('-header'):
         relations = relations[1:]
     # order of the columns
     order = socialConfig['-columns'].strip().split()
     if len(order) <= 2:
         print 'The social file is not in a correct format.'
     for lineNo, line in enumerate(relations):
         items = split(' |,|\t', line.strip())
         if len(order) < 2:
             print 'The social file is not in a correct format. Error: Line num %d' % lineNo
             exit(-1)
         userId1 = items[int(order[0])]
         userId2 = items[int(order[1])]
         if len(order) < 3:
             weight = 1
         else:
             weight = float(items[int(order[2])])
         relation.append([userId1, userId2, weight])
     return relation
Beispiel #3
0
 def loadInformation2(conf, filePath):
     metaConfig = LineConfig(conf['dire.setup'])
     inform2 = []
     print 'loading director data...'
     with open(filePath) as f:
         informs = f.readlines()
         # ignore the headline
     if metaConfig.contains('-header'):
         informs = informs[1:]
     # order of the columns
     order = metaConfig['-columns'].strip().split()
     if len(order) <= 2:
         print 'The director file is not in a correct format.'
     for lineNo, line in enumerate(informs):
         items = split(' |,|\t', line.strip())
         if len(order) < 2:
             print 'The actor file is not in a correct format. Error: Line num %d' % lineNo
             exit(-1)
         movieId = items[int(order[0])]
         direId = items[int(order[1])]
         if len(order) < 3:
             weight = 1
         else:
             weight = float(items[int(order[2])])
         inform2.append([movieId, direId, weight])
     return inform2
Beispiel #4
0
 def loadDataSet(conf, file, bTest=False, binarized=False, threshold=3.0):
     trainingData = []
     testData = []
     ratingConfig = LineConfig(conf['ratings.setup'])
     if not bTest:
         print 'loading training data...'
     else:
         print 'loading test data...'
     with open(file) as f:
         ratings = f.readlines()
     # ignore the headline
     if ratingConfig.contains('-header'):
         ratings = ratings[1:]
     # order of the columns
     order = ratingConfig['-columns'].strip().split()
     delim = ' |,|\t'
     if ratingConfig.contains('-delim'):
         delim = ratingConfig['-delim']
     for lineNo, line in enumerate(ratings):
         items = split(delim, line.strip())
         if not bTest and len(order) < 2:
             print 'The rating file is not in a correct format. Error: Line num %d' % lineNo
             exit(-1)
         try:
             userId = items[int(order[0])]
             itemId = items[int(order[1])]
             if len(order) < 3:
                 rating = 1  #default value
             else:
                 rating = items[int(order[2])]
             if binarized:
                 if float(items[int(order[2])]) < threshold:
                     continue
                 else:
                     rating = 1
         except ValueError:
             print 'Error! Have you added the option -header to the rating.setup?'
             exit(-1)
         if not bTest:
             trainingData.append([userId, itemId, float(rating)])
         else:
             if binarized:
                 if rating == 1:
                     testData.append([userId, itemId, float(rating)])
                 else:
                     continue
             testData.append([userId, itemId, float(rating)])
     if not bTest:
         return trainingData
     else:
         return testData
Beispiel #5
0
    def loadDataSet(conf, file, bTest=False):
        trainingData = defaultdict(dict)
        testData = defaultdict(dict)
        ratingConfig = LineConfig(conf['ratings.setup'])
        if not bTest:
            print 'loading training data...'
        else:
            print 'loading test data...'
        with open(file) as f:
            ratings = f.readlines()
        # ignore the headline
        if ratingConfig.contains('-header'):
            ratings = ratings[1:]
        # order of the columns
        order = ratingConfig['-columns'].strip().split()

        for lineNo, line in enumerate(ratings):
            items = split(' |,|\t', line.strip())
            if not bTest and len(order) < 3:
                print 'The rating file is not in a correct format. Error: Line num %d' % lineNo
                exit(-1)
            try:
                userId = items[int(order[0])]
                itemId = items[int(order[1])]
                if bTest and len(order) < 3:
                    rating = 1  #default value
                else:
                    rating = items[int(order[2])]

            except ValueError:
                print 'Error! Have you added the option -header to the rating.setup?'
                exit(-1)
            if not bTest:
                trainingData[userId][itemId] = float(rating)
            else:
                testData[userId][itemId] = float(rating)
        if not bTest:
            return trainingData
        else:
            return testData
Beispiel #6
0
    def loadDataSet(conf, file, bTest=False):
        trainingData = defaultdict(dict)
        testData = defaultdict(dict)
        ratingConfig = LineConfig(conf['ratings.setup'])
        if not bTest:
            print 'loading training data...'
        else:
            print 'loading test data...'
        with open(file) as f:
            ratings = f.readlines()
        # ignore the headline
        if ratingConfig.contains('-header'):
            ratings = ratings[1:]
        # order of the columns
        order = ratingConfig['-columns'].strip().split()

        for lineNo, line in enumerate(ratings):
            items = split(' |,|\t', line.strip())
            if not bTest and len(order) < 3:
                print 'The rating file is not in a correct format. Error: Line num %d' % lineNo
                exit(-1)
            try:
                userId = items[int(order[0])]
                itemId = items[int(order[1])]
                if bTest and len(order)<3:
                    rating = 1 #default value
                else:
                    rating  = items[int(order[2])]

            except ValueError:
                print 'Error! Have you added the option -header to the rating.setup?'
                exit(-1)
            if not bTest:
                trainingData[userId][itemId]=float(rating)
            else:
                testData[userId][itemId] = float(rating)
        if not bTest:
            return trainingData
        else:
            return testData