def loadDataset(filename, usermap, itemmap, parser, shape=None): r = envoy.run('wc -l {}'.format(filename)) num_lines = int(r.std_out.strip().partition(' ')[0]) bar = progressbar.ProgressBar(maxval=num_lines, widgets=[ "Loading data: ", progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage(), ' ', progressbar.ETA() ]).start() I, J, V = [], [], [] cold = [] with open(filename) as f: for i, line in enumerate(f): if (i % 1000) == 0: bar.update(i % bar.maxval) userid, itemid, rating = parser.parse(line) if userid not in usermap or itemid not in itemmap: cold.append((userid, itemid, rating)) continue uid = usermap[userid] iid = itemmap[itemid] I.append(uid) J.append(iid) V.append(float(rating)) bar.finish() if shape is not None: R = scipy.sparse.coo_matrix((V, (I, J)), shape=shape) else: R = scipy.sparse.coo_matrix((V, (I, J)), shape=(len(usermap), len(itemmap))) R = coo_tocsr(R) return R, cold
def import_data(self, filename, parser, shape=None, num_headers=0, debug=False): r = envoy.run('wc -l {}'.format(filename)) num_lines = int(r.std_out.strip().partition(' ')[0]) bar = progressbar.ProgressBar(maxval=num_lines, widgets=[ "Loading data: ", progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage(), ' ', progressbar.ETA() ]).start() I, J, V = [], [], [] with open(filename) as f: for i in range(num_headers): f.readline() for i, line in enumerate(f): if (i % 1000) == 0: bar.update(i % bar.maxval) try: userid, itemid, rating = parser.parse(line) self.update_user_item(userid, itemid) uid = self.users[userid] iid = self.items[itemid] I.append(uid) J.append(iid) V.append(float(rating)) except: if debug: print "Ignoring Input: ", line, continue bar.finish() if shape is not None: _shape = (self.nusers if shape[0] is None else shape[0], self.nitems if shape[1] is None else shape[1]) R = scipy.sparse.coo_matrix((V, (I, J)), shape=_shape) else: R = scipy.sparse.coo_matrix((V, (I, J)), shape=(self.nusers, self.nitems)) self.R = coo_tocsr(R) sys.stdout.flush() return self.R
def import_data(self, filename, parser, shape=None, num_headers=0, debug=False): r = envoy.run("wc -l {}".format(filename)) num_lines = int(r.std_out.strip().partition(" ")[0]) bar = progressbar.ProgressBar( maxval=num_lines, widgets=[ "Loading data: ", progressbar.Bar("=", "[", "]"), " ", progressbar.Percentage(), " ", progressbar.ETA(), ], ).start() I, J, V = [], [], [] with open(filename) as f: for i in range(num_headers): f.readline() for i, line in enumerate(f): if (i % 1000) == 0: bar.update(i % bar.maxval) try: userid, itemid, rating = parser.parse(line) self.update_user_item(userid, itemid) uid = self.users[userid] iid = self.items[itemid] I.append(uid) J.append(iid) V.append(float(rating)) except: if debug: print "Ignoring Input: ", line, continue bar.finish() if shape is not None: _shape = (self.nusers if shape[0] is None else shape[0], self.nitems if shape[1] is None else shape[1]) R = scipy.sparse.coo_matrix((V, (I, J)), shape=_shape) else: R = scipy.sparse.coo_matrix((V, (I, J)), shape=(self.nusers, self.nitems)) self.R = coo_tocsr(R) sys.stdout.flush() return self.R
def loadSideInfo(filename, targetmap, parser, shape=None): r = envoy.run("wc -l {}".format(filename)) num_lines = int(r.std_out.strip().partition(" ")[0]) bar = progressbar.ProgressBar( maxval=num_lines, widgets=[ "Loading data: ", progressbar.Bar("=", "[", "]"), " ", progressbar.Percentage(), " ", progressbar.ETA(), ], ).start() I, J, V = [], [], [] cold = [] counter = 0 feature_map = {} with open(filename) as f: for i, line in enumerate(f): if (i % 1000) == 0: bar.update(i % bar.maxval) keyid, featureid = parser.parse(line) if keyid not in targetmap: continue if featureid not in feature_map: feature_map[featureid] = counter counter += 1 kid = targetmap[keyid] fid = feature_map[featureid] I.append(kid) J.append(fid) V.append(1.0) bar.finish() if shape is not None: R = scipy.sparse.coo_matrix((V, (I, J)), shape=shape) else: R = scipy.sparse.coo_matrix((V, (I, J)), shape=(len(targetmap), len(feature_map))) R = coo_tocsr(R) return R, feature_map
def loadSideInfo(filename, targetmap, parser, shape=None): r = envoy.run('wc -l {}'.format(filename)) num_lines = int(r.std_out.strip().partition(' ')[0]) bar = progressbar.ProgressBar(maxval=num_lines, widgets=[ "Loading data: ", progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage(), ' ', progressbar.ETA() ]).start() I, J, V = [], [], [] cold = [] counter = 0 feature_map = {} with open(filename) as f: for i, line in enumerate(f): if (i % 1000) == 0: bar.update(i % bar.maxval) keyid, featureid = parser.parse(line) if keyid not in targetmap: continue if featureid not in feature_map: feature_map[featureid] = counter counter += 1 kid = targetmap[keyid] fid = feature_map[featureid] I.append(kid) J.append(fid) V.append(1.0) bar.finish() if shape is not None: R = scipy.sparse.coo_matrix((V, (I, J)), shape=shape) else: R = scipy.sparse.coo_matrix((V, (I, J)), shape=(len(targetmap), len(feature_map))) R = coo_tocsr(R) return R, feature_map
def loadDataset(filename, usermap, itemmap, parser, shape=None): r = envoy.run("wc -l {}".format(filename)) num_lines = int(r.std_out.strip().partition(" ")[0]) bar = progressbar.ProgressBar( maxval=num_lines, widgets=[ "Loading data: ", progressbar.Bar("=", "[", "]"), " ", progressbar.Percentage(), " ", progressbar.ETA(), ], ).start() I, J, V = [], [], [] cold = [] with open(filename) as f: for i, line in enumerate(f): if (i % 1000) == 0: bar.update(i % bar.maxval) userid, itemid, rating = parser.parse(line) if userid not in usermap or itemid not in itemmap: cold.append((userid, itemid, rating)) continue uid = usermap[userid] iid = itemmap[itemid] I.append(uid) J.append(iid) V.append(float(rating)) bar.finish() if shape is not None: R = scipy.sparse.coo_matrix((V, (I, J)), shape=shape) else: R = scipy.sparse.coo_matrix((V, (I, J)), shape=(len(usermap), len(itemmap))) R = coo_tocsr(R) return R, cold