def getGt(n=1558): gts = list() for i in range(n): for j in range(n): if i == j: gts.append("1") else: gts.append("0") ut.writeList2Line(predictionPath, gtFilename, gts)
def nm(threshold=0.67): instances = ut.readCommaLine2List(outputPath, featureNmFilename) predictions = list() count = 0 for instance in instances: if (float(instance[1]) > threshold) or (float(instance[2])>threshold): count += 1 predictions.append("1") else: predictions.append("0") ut.writeList2Line(predPath, predictionNmFilename, predictions)
def writeMappingCandidates(): mappings = ut.readCommaLine2List(inputPath, "twitterMapping") candidates_google = list() candidates_twitter = list() for mapping in mappings: google_id = mapping[0] twitter_url = mapping[1] twitter_name = getTwitterUsername(twitter_url) if twitter_name != "": candidates_google.append(google_id) candidates_twitter.append(twitter_name) ut.writeList2Line(inputPath, "google/ids_mapping", candidates_google) ut.writeList2Line(inputPath, "twitter/names_mapping", candidates_twitter)
def ranking(n=1558, filename="ranking_origin_1558.txt"): scores = ut.readLine2List(predPath, filename) preds = list() for i in range(n): # print(i*n) scores_i = scores[i*n:(i+1)*n] max_index = max(enumerate(scores_i), key=lambda k: float(k[1]))[0] # print(max_index) preds_i = ["0"]*1558 preds_i[max_index] = "1" preds += preds_i ut.writeList2Line(predPath, predictionRankFilename, preds) return preds
def writeMissingGooglePosts(): ids = ut.readLine2List("../data/google/", "ids_mapping") ids_parsed = list() ids_errors = list() for root, folder, filenames in os.walk("../data/google/wall"): ids_parsed = filenames ids_errors = list(set(ids)-set(ids_parsed)) for filename in filenames: with open(os.path.join(root, filename), "r", errors="ignore") as fi: try: result = json.loads(fi.read()) if type(result) == dict: ids_errors.append(filename) except: pass ut.writeList2Line("../data/stat/", "google_ids_post_errors", ids_errors)
def writeMissingGooglePosts(): ids = ut.readLine2List("../data/google/", "ids_mapping") ids_parsed = list() ids_errors = list() for root, folder, filenames in os.walk("../data/google/wall"): ids_parsed = filenames ids_errors = list(set(ids) - set(ids_parsed)) for filename in filenames: with open(os.path.join(root, filename), "r", errors="ignore") as fi: try: result = json.loads(fi.read()) if type(result) == dict: ids_errors.append(filename) except: pass ut.writeList2Line("../data/stat/", "google_ids_post_errors", ids_errors)
def oneMapping(scores, outputFilename=predictionRankConstraintFilename, n=1558): users1 = list() users2 = dict() predictions = list() results = list() # init for i in range(n): users2[i] = {"active": 0, "user": 0, "index": 0, "score": 0} for i in range(n): scores_i = scores[n*i:n*(i+1)] scores_i_sorted = sorted(enumerate(scores_i), key=lambda k: k[1], reverse=True) users1.append(scores_i_sorted) # choose one mapping for i in range(n): oneMappingRecur(users1, users2, i, 0) results = sorted([(v["user"], k) for k, v in users2.items()], key=operator.itemgetter(0)) for pair in results: user2 = pair[1] predictions_i = ["0"]*n predictions_i[user2] = "1" predictions+=predictions_i ut.writeList2Line(predPath, outputFilename, predictions)
def reviseIdFile(): ids_visited = ut.readLine2List(snFolder, idsVisitedFileName+"2") ids_saw = ut.readLine2List(snFolder, idsSawFileName) loss = ut.readLine2List(snFolder, "tmp_ids") # revise id file duplicate problem g=nx.Graph() dup = list() num = list() for i in range(len(ids_saw)): id = ids_saw[i] try: g.node[id] dup.append(id) num.append(i) except: g.add_node(id) print(len(dup)) for i in range(len(num)-1, -1, -1): pos = num[i] del ids_saw[pos] for l in loss: ids_saw.append(l) ut.writeList2Line("../data/google/", "ids_saw2", ids_saw)
def reviseIdFile(): ids_visited = ut.readLine2List(snFolder, idsVisitedFileName + "2") ids_saw = ut.readLine2List(snFolder, idsSawFileName) loss = ut.readLine2List(snFolder, "tmp_ids") # revise id file duplicate problem g = nx.Graph() dup = list() num = list() for i in range(len(ids_saw)): id = ids_saw[i] try: g.node[id] dup.append(id) num.append(i) except: g.add_node(id) print(len(dup)) for i in range(len(num) - 1, -1, -1): pos = num[i] del ids_saw[pos] for l in loss: ids_saw.append(l) ut.writeList2Line("../data/google/", "ids_saw2", ids_saw)