def fillEntry_ref(self, arnumber, layer):
     if not arnumber in self.data:
         self.data[arnumber] = {}
         self.data[arnumber]['refs'] = getRefs(arnumber)
         self.data[arnumber]['keywords'] = index(arnumber)
         #index function return "IEEE TERMS" of paper ID arnumber
         self.data[arnumber]['layer'] = layer
 def getNextLayer_cite(self, curr_layer_lst):
     tmp = {}
     for num in curr_layer_lst:
         for cite in self.data[num]["cites"]:
             if not cite in self.data:
                 if not cite in tmp:
                     citation = len(getCites(cite))
                     if citation > 10:
                         tmp[cite] = {}
                         tmp[cite]["citation"] = citation
                         tmp[cite]["keywords"] = index(cite)
                         tmp[cite]["similarity"] = len(set(self.data[num]["keywords"]) & set(tmp[cite]["keywords"]))
                         tmp[cite]["repeat"] = 1
                 else:
                     siml = len(set(self.data[num]["keywords"]) & set(tmp[cite]["keywords"]))
                     tmp[cite]["similarity"] = max(tmp[cite]["similarity"], siml)
                     tmp[cite]["repeat"] += 1
     score = {}
     for cite in tmp:
         score[cite] = tmp[cite]["similarity"] * tmp[cite]["citation"] * tmp[cite]["repeat"]
     score = sorted(score.iteritems(), key=operator.itemgetter(1), reverse=True)
     maxIteration = min(len(score), self.nodes_cite)
     result = [0] * maxIteration
     for i in xrange(maxIteration):
         result[i] = score[i][0]
         print result[i]
         print tmp[result[i]]["similarity"], tmp[result[i]]["citation"], tmp[result[i]]["repeat"]
     return result
 def getNextLayer_ref(self, curr_layer_lst):
     tmp = {}
     for num in curr_layer_lst:
         print "%d is processed" % (num)
         for ref in self.data[num]["refs"]:
             print "    refer paper %d is accessed" % (ref)
             if not ref in self.data:
                 if not ref in tmp:
                     citation = len(getCites(ref))
                     if citation > 1:
                         tmp[ref] = {}
                         tmp[ref]["citation"] = citation
                         tmp[ref]["keywords"] = index(ref)
                         tmp[ref]["similarity"] = len(set(self.data[num]["keywords"]) & set(tmp[ref]["keywords"]))
                         tmp[ref]["repeat"] = 1
                 else:
                     siml = len(set(self.data[num]["keywords"]) & set(tmp[ref]["keywords"]))
                     tmp[ref]["similarity"] = max(tmp[ref]["similarity"], siml)
                     tmp[ref]["repeat"] += 1
     score = {}
     training_data = []
     print len(tmp)
     for ref in tmp:
         score[ref] = tmp[ref]["similarity"] * tmp[ref]["citation"] * tmp[ref]["repeat"]
         # Candidate ref papers are given scores by similarity * citation * # of citations from last layer
         training_data.append([tmp[ref]["similarity"], tmp[ref]["citation"], tmp[ref]["repeat"]])
     score = sorted(score.iteritems(), key=operator.itemgetter(1), reverse=True)
     maxIteration = min(len(score), self.nodes_ref)
     result = [0] * maxIteration
     for i in xrange(maxIteration):
         result[i] = score[i][0]
         print result[i]
         print tmp[result[i]]["similarity"], tmp[result[i]]["citation"], tmp[result[i]]["repeat"]
         # Top nodes_ref papers are chosen for next layer
     return result, training_data
 def fillEntry_ref(self, arnumber, layer):
     if not arnumber in self.data:
         self.data[arnumber] = {}
         self.data[arnumber]["refs"] = getRefs(arnumber)
         self.data[arnumber]["keywords"] = index(arnumber)
         # index function return "IEEE TERMS" of paper ID arnumber
         self.data[arnumber]["layer"] = layer
 def getNextLayer_ref(self, curr_layer_lst):
     tmp = {}
     for num in curr_layer_lst:
         print "%d is processed" % (num)
         for ref in self.data[num]['refs']:
             print "    refer paper %d is accessed" % (ref)
             if not ref in self.data:
                 if not ref in tmp:
                     citation = len(getCites(ref))
                     if citation > 1:
                         tmp[ref] = {}
                         tmp[ref]['citation'] = citation
                         tmp[ref]['keywords'] = index(ref)
                         tmp[ref]['similarity'] = len(
                             set(self.data[num]['keywords'])
                             & set(tmp[ref]['keywords']))
                         tmp[ref]['repeat'] = 1
                 else:
                     siml = len(
                         set(self.data[num]['keywords'])
                         & set(tmp[ref]['keywords']))
                     tmp[ref]['similarity'] = max(tmp[ref]['similarity'],
                                                  siml)
                     tmp[ref]['repeat'] += 1
     score = {}
     training_data = []
     print len(tmp)
     for ref in tmp:
         score[ref] = tmp[ref]['similarity'] * tmp[ref]['citation'] * tmp[
             ref]['repeat']
         # Candidate ref papers are given scores by similarity * citation * # of citations from last layer
         training_data.append([
             tmp[ref]['similarity'], tmp[ref]['citation'],
             tmp[ref]['repeat']
         ])
     score = sorted(score.iteritems(),
                    key=operator.itemgetter(1),
                    reverse=True)
     maxIteration = min(len(score), self.nodes_ref)
     result = [0] * maxIteration
     for i in xrange(maxIteration):
         result[i] = score[i][0]
         print result[i]
         print tmp[result[i]]['similarity'], tmp[
             result[i]]['citation'], tmp[result[i]]['repeat']
     # Top nodes_ref papers are chosen for next layer
     return result, training_data
 def getNextLayer_cite(self, curr_layer_lst):
     tmp = {}
     for num in curr_layer_lst:
         for cite in self.data[num]['cites']:
             if not cite in self.data:
                 if not cite in tmp:
                     citation = len(getCites(cite))
                     if citation > 10:
                         tmp[cite] = {}
                         tmp[cite]['citation'] = citation
                         tmp[cite]['keywords'] = index(cite)
                         tmp[cite]['similarity'] = len(
                             set(self.data[num]['keywords'])
                             & set(tmp[cite]['keywords']))
                         tmp[cite]['repeat'] = 1
                 else:
                     siml = len(
                         set(self.data[num]['keywords'])
                         & set(tmp[cite]['keywords']))
                     tmp[cite]['similarity'] = max(tmp[cite]['similarity'],
                                                   siml)
                     tmp[cite]['repeat'] += 1
     score = {}
     for cite in tmp:
         score[cite] = tmp[cite]['similarity'] * tmp[cite][
             'citation'] * tmp[cite]['repeat']
     score = sorted(score.iteritems(),
                    key=operator.itemgetter(1),
                    reverse=True)
     maxIteration = min(len(score), self.nodes_cite)
     result = [0] * maxIteration
     for i in xrange(maxIteration):
         result[i] = score[i][0]
         print result[i]
         print tmp[result[i]]['similarity'], tmp[
             result[i]]['citation'], tmp[result[i]]['repeat']
     return result
 def fillEntry_cite(self, arnumber, layer):
     if not arnumber in self.data:
         self.data[arnumber] = {}
         self.data[arnumber]['cites'] = getCites(arnumber)
         self.data[arnumber]['keywords'] = index(arnumber)
         self.data[arnumber]['layer'] = layer
 def fillEntry_cite(self, arnumber, layer):
     if not arnumber in self.data:
         self.data[arnumber] = {}
         self.data[arnumber]["cites"] = getCites(arnumber)
         self.data[arnumber]["keywords"] = index(arnumber)
         self.data[arnumber]["layer"] = layer