logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) dataset = ArnetMinerDataset() #dataset.dataFilename = dataset.dataDir + "DBLP-citation-100000.txt" authorList, documentList, citationList = dataset.readAuthorsAndDocuments() authorSet = set(itertools.chain.from_iterable(authorList)) print("Found all authors") expertMatchesDict = {} for field in dataset.fields: expertMatchesDict[field] = set([]) for expert in dataset.expertsDict[field]: if expert in authorSet: expertMatchesDict[field].add(expert) expertMatchesDict[field] = sorted(list(expertMatchesDict[field])) #Now write out the matched experts for field in dataset.fields: outputFilename = dataset.getDataFieldDir(field) + "matched_experts.txt" lines = [x + "\n" for x in expertMatchesDict[field]] outputFile = open(outputFilename, "w") outputFile.writelines(lines) outputFile.close() logging.debug("Wrote experts to " + outputFilename)