def graph_to_vector(g, metric, all_tokens): """Create feature vector from a single graph. The list of *all_tokens* is used as basis for the feature vector, and value for each word in graph *g* according to *metric* is calculated. """ cents = graph.centralities(g, metric) vector = [cents.get(token, 0.0) for token in all_tokens] return vector
def store_centralities(corpus, context): print '> Calculating and storing centralities for', corpus g = retrieve_corpus_network(corpus, context) metrics = graph_representation.get_metrics(True, exclude_flow=True) for metric in metrics: m = metric.split()[0] store_path = 'output/centralities/co-occurrence/'+corpus+'/'+context+'/'+m+'.cent' if data.pickle_from_file(store_path, suppress_warning=True): print ' already present, skipping:', metric continue else: print ' calculating:', metric try: c = graph.centralities(g, metric) data.pickle_to_file(c, store_path) except MemoryError as e: print 'MemoryError :(' data.write_to_file('MemoryError while claculating '+metric+' on '+corpus+':\n'+str(e)+'\n\n', 'output/log/errors')
def store_centralities(corpus, context): print '> Calculating and storing centralities for', corpus g = retrieve_corpus_network(corpus, context) metrics = graph_representation.get_metrics(True, exclude_flow=True) for metric in metrics: m = metric.split()[0] store_path = 'output/centralities/co-occurrence/' + corpus + '/' + context + '/' + m + '.cent' if data.pickle_from_file(store_path, suppress_warning=True): print ' already present, skipping:', metric continue else: print ' calculating:', metric try: c = graph.centralities(g, metric) data.pickle_to_file(c, store_path) except MemoryError as e: print 'MemoryError :(' data.write_to_file( 'MemoryError while claculating ' + metric + ' on ' + corpus + ':\n' + str(e) + '\n\n', 'output/log/errors')
def graph_to_dict(g, metric, icc=None): """Return node values as dictionary If `icc` is provided, values are TC-ICC, otherwise TC is calculated. """ import pprint as pp centralities = graph.centralities(g, metric) if icc: for term in centralities: try: centralities[term] = centralities[term] * icc[term] except KeyError as ke: # excepting for this to detect possible missmatch between icc and doc network # TODO: should be cleaned up once tc-icc eval exp is done if term not in centralities and term in icc: print ' !', str(ke), 'found in icc but not in graph centralities' else: print ' !', str(ke), 'found in graph centralities but not in icc' centralities[term] = 0.0 return centralities
def graph_to_dict(g, metric, icc=None): """Return node values as dictionary If `icc` is provided, values are TC-ICC, otherwise TC is calculated. """ import pprint as pp centralities = graph.centralities(g, metric) if icc: for term in centralities: try: centralities[term] = centralities[term] * icc[term] except KeyError as ke: # excepting for this to detect possible missmatch between icc and doc network # TODO: should be cleaned up once tc-icc eval exp is done if term not in centralities and term in icc: print ' !', str( ke), 'found in icc but not in graph centralities' else: print ' !', str( ke), 'found in graph centralities but not in icc' centralities[term] = 0.0 return centralities