def main(): import time import datetime c = DataCenterClient("tcp://10.1.1.211:32011") x = c.searchPublications("data mining") data_fields = ["id", "mid", "uid", "parent", "type", "t", "user_created_at", "followers_count", "statuses_count", "friends_count", "username", "text", "words", "verified", "emotion"]; items = [] for p in x.publications: au = "0" if len(p.author_ids) > 0: au = p.author_ids[0] dt = datetime.datetime(p.year, 1, 1, 1, 1) t = int(time.mktime(dt.timetuple())) children = [] parents = [] for x in p.cited_by_pubs: children.append(str(x)) y = [str(p.id), str(p.id), str(au), children, 0, t, t, p.n_citations, p.n_citations, p.n_citations, p.authors, p.title, "hello,world"] items.append(y) import json dump = open("pubs_dump.json","w") d = json.dumps(items) dump.write(d) dump.close() import pickle terms = pickle.load(open("..\\static\\pickle\\terms_dump_all.pickle"))
def main(): import time import datetime c = DataCenterClient("tcp://10.1.1.211:32011") x = c.searchPublications("data mining") data_fields = ["id", "mid", "uid", "parent", "type", "t", "user_created_at", "followers_count", "statuses_count", "friends_count", "username", "text", "words", "verified", "emotion"]; items = [] for p in x.publications: au = "0" if len(p.author_ids) > 0: au = p.author_ids[0] dt = datetime.datetime(p.year, 1, 1, 1, 1) t = int(time.mktime(dt.timetuple())) children = [] parents = [] for x in p.cited_by_pubs: children.append(str(x)) y = [str(p.id), str(p.id), str(au), children, 0, t, t, p.n_citations, p.n_citations, p.n_citations, p.authors, p.title, "hello,world"] items.append(y) import json dump = open("pubs_dump.json","w") d = json.dumps(items) dump.write(d) dump.close() import pickle terms = pickle.load(open("..\\static\\pickle\\terms_dump_all.pickle"))
def getCitationNetwork(): import time import datetime from collections import defaultdict c = DataCenterClient("tcp://10.1.1.211:32011") x = c.searchPublications("deep learning") data_fields = [ "id", "mid", "uid", "parent", "type", "t", "user_created_at", "followers_count", "statuses_count", "friends_count", "username", "text", "words", "verified", "emotion" ] items = [] cite_pubs = [] key_terms = defaultdict(int) year_terms = defaultdict(lambda: defaultdict(int)) for p in x.publications: if p.year <= 1970: continue item, children, parents, kt = extractPublication(p) if len(children) > 0: items.append(item) cite_pubs.extend(children) cite_pubs.extend(parents) for k in kt: key_terms[k.lower()] += 1 year_terms[p.year][k.lower()] += 1 cite_pubs = list(set(cite_pubs)) x = c.getPublicationsById(cite_pubs) for p in x.publications: if p.year <= 1970: continue item, children, parents, kt = extractPublication(p) if len(children) > 0 and len(children) > 0: items.append(item) cite_pubs.extend(children) for k in kt: key_terms[k.lower()] += 1 year_terms[p.year][k.lower()] += 1 sorted_key_terms = sorted(key_terms.items(), key=lambda x: x[1], reverse=True) import json dump = open("pubs_dump.json", "w") d = json.dumps(items) dump.write(d) dump.close()
def getCitationNetwork(): import time import datetime from collections import defaultdict c = DataCenterClient("tcp://10.1.1.211:32011") x = c.searchPublications("deep learning") data_fields = ["id", "mid", "uid", "parent", "type", "t", "user_created_at", "followers_count", "statuses_count", "friends_count", "username", "text", "words", "verified", "emotion"]; items = [] cite_pubs = [] key_terms = defaultdict(int) year_terms = defaultdict(lambda: defaultdict(int)) for p in x.publications: if p.year <= 1970: continue item, children, parents, kt = extractPublication(p) if len(children) > 0: items.append(item) cite_pubs.extend(children) cite_pubs.extend(parents) for k in kt: key_terms[k.lower()] += 1 year_terms[p.year][k.lower()] += 1 cite_pubs = list(set(cite_pubs)) x = c.getPublicationsById(cite_pubs) for p in x.publications: if p.year <= 1970: continue item, children, parents, kt = extractPublication(p) if len(children) > 0 and len(children) > 0: items.append(item) cite_pubs.extend(children) for k in kt: key_terms[k.lower()] += 1 year_terms[p.year][k.lower()] += 1 sorted_key_terms = sorted(key_terms.items(), key=lambda x: x[1], reverse=True) import json dump = open("pubs_dump.json", "w") d = json.dumps(items) dump.write(d) dump.close()