コード例 #1
0
def graph_to_vector(g, metric, all_tokens):
    """Create feature vector from a single graph.

    The list of *all_tokens* is used as basis for the feature vector, and
    value for each word in graph *g* according to *metric* is calculated.
    """
    cents = graph.centralities(g, metric)
    vector = [cents.get(token, 0.0) for token in all_tokens]
    return vector
コード例 #2
0
def graph_to_vector(g, metric, all_tokens):
    """Create feature vector from a single graph.

    The list of *all_tokens* is used as basis for the feature vector, and
    value for each word in graph *g* according to *metric* is calculated.
    """
    cents = graph.centralities(g, metric)
    vector = [cents.get(token, 0.0) for token in all_tokens]
    return vector
コード例 #3
0
def store_centralities(corpus, context):
    print '> Calculating and storing centralities for', corpus
    g = retrieve_corpus_network(corpus, context)
    metrics = graph_representation.get_metrics(True, exclude_flow=True)

    for metric in metrics:
        m = metric.split()[0]
        store_path = 'output/centralities/co-occurrence/'+corpus+'/'+context+'/'+m+'.cent'
        if data.pickle_from_file(store_path, suppress_warning=True):
            print '    already present, skipping:', metric
            continue
        else:
            print '    calculating:', metric
        try:
            c = graph.centralities(g, metric)
            data.pickle_to_file(c, store_path)
        except MemoryError as e:
            print 'MemoryError :('
            data.write_to_file('MemoryError while claculating '+metric+' on '+corpus+':\n'+str(e)+'\n\n', 'output/log/errors')
コード例 #4
0
def store_centralities(corpus, context):
    print '> Calculating and storing centralities for', corpus
    g = retrieve_corpus_network(corpus, context)
    metrics = graph_representation.get_metrics(True, exclude_flow=True)

    for metric in metrics:
        m = metric.split()[0]
        store_path = 'output/centralities/co-occurrence/' + corpus + '/' + context + '/' + m + '.cent'
        if data.pickle_from_file(store_path, suppress_warning=True):
            print '    already present, skipping:', metric
            continue
        else:
            print '    calculating:', metric
        try:
            c = graph.centralities(g, metric)
            data.pickle_to_file(c, store_path)
        except MemoryError as e:
            print 'MemoryError :('
            data.write_to_file(
                'MemoryError while claculating ' + metric + ' on ' + corpus +
                ':\n' + str(e) + '\n\n', 'output/log/errors')
コード例 #5
0
def graph_to_dict(g, metric, icc=None):
    """Return node values as dictionary

    If `icc` is provided, values are TC-ICC, otherwise TC is calculated.
    """
    import pprint as pp
    centralities = graph.centralities(g, metric)
    if icc:
        for term in centralities:
            try:
                centralities[term] = centralities[term] * icc[term]
            except KeyError as ke:
                # excepting for this to detect possible missmatch between icc and doc network
                # TODO: should be cleaned up once tc-icc eval exp is done
                if term not in centralities and term in icc:
                    print '    !', str(ke), 'found in icc but not in graph centralities'
                else:
                    print '    !', str(ke), 'found in graph centralities but not in icc'
                centralities[term] = 0.0

    return centralities
コード例 #6
0
def graph_to_dict(g, metric, icc=None):
    """Return node values as dictionary

    If `icc` is provided, values are TC-ICC, otherwise TC is calculated.
    """
    import pprint as pp
    centralities = graph.centralities(g, metric)
    if icc:
        for term in centralities:
            try:
                centralities[term] = centralities[term] * icc[term]
            except KeyError as ke:
                # excepting for this to detect possible missmatch between icc and doc network
                # TODO: should be cleaned up once tc-icc eval exp is done
                if term not in centralities and term in icc:
                    print '    !', str(
                        ke), 'found in icc but not in graph centralities'
                else:
                    print '    !', str(
                        ke), 'found in graph centralities but not in icc'
                centralities[term] = 0.0

    return centralities