コード例 #1
0
ファイル: extractor.py プロジェクト: tppolkow/brevity
    def extract(raw_txt, logger):

        c = Cleaner()
        cleaned_text_list = c.clean(raw_txt)

        logger.info('Done cleaning')
        logger.debug(len(cleaned_text_list))
        logger.debug(cleaned_text_list)

        matrix_builder = MatrixBuilder()
        matrix = matrix_builder.build_sim_matrix(cleaned_text_list, logger)

        logger.info('Done building sim matrix')
        logger.debug('Dimensions: {}'.format(matrix.shape))
        logger.debug(matrix)

        g = Grapher()
        pageranks = g.graph(matrix)

        logger.info('Generated graph and got pageranks')
        logger.debug(pageranks)

        total_doc_size = len(cleaned_text_list)
        if total_doc_size in range(0, 300):
            summary_length = int(0.4 * total_doc_size)
        elif total_doc_size in range(301, 800):
            summary_length = int(0.2 * total_doc_size)
        elif total_doc_size in range(801, 1500):
            summary_length = int(0.1 * total_doc_size)
        else:
            summary_length = int(0.05 * total_doc_size)

        top_ranked = nlargest(summary_length, pageranks, key=pageranks.get)
        top_ranked.sort()

        cl = Cluster()
        top_ranked = cl.splitIntoParagraph(top_ranked, 7.5)

        logger.debug(top_ranked)
        result = ''
        for paragraph in top_ranked:
            for key in paragraph:
                top_ranked_sentence = cleaned_text_list[key]
                result += '{}. '.format(top_ranked_sentence)
            result += '\n\n'

        try:
            del c
            del cleaned_text_list
            del matrix_builder
            del matrix
            del g
            del pageranks
            del total_doc_size
            del summary_length
            del top_ranked
            del cl
            del raw_txt
        except:
            pass

        return result
コード例 #2
0
    utils.exit_unless_accessible(args.calls)
    utils.setup_logging(verbosity=args.verbose)

    # Load graph database (remove duplicates)
    df_all = df_from_csv_file(args.calls)
    df = df_all.drop_duplicates()

    from_fun, to_fun = args.from_function, args.to_function
    left, right = search_settings(args.direction, args.cutoff)

    merge_on = ["caller_filename", "caller_function", "callee_filename", "callee_function"]
    chains_df_right = pd.DataFrame(columns=merge_on)
    if right:
        chains_df_right = find_chains_directed_df(df, from_fun, to_fun, right)

    chains_df_left = pd.DataFrame(columns=merge_on)
    if left:
        chains_df_left = find_chains_directed_df(df, from_fun, to_fun, left)

    _LOGGER.info("Generating the results...")
    df_chains = pd.concat([chains_df_left, chains_df_right]).drop_duplicates()
    df_chains = pd.merge(df_all, df_chains, on=merge_on, how='inner')
    if args.out.endswith(".csv"):
        df_to_csv_file(df_chains, args.out)
    else:
        grapher = Grapher(args.out)
        grapher.graph(df_chains)
        grapher.render(args.out)

    _LOGGER.info("Done")
コード例 #3
0
ファイル: main.py プロジェクト: platers/asset-sim
                                               min_value=0.0)
    assumptions.RRA = st.number_input('RRA',
                                      value=2.0,
                                      step=1.0,
                                      min_value=0.1)

# re initialize strategies based on user defined parameters
for i in range(len(strategies)):
    s = strategies[i].__class__(assumptions)
    strategies[i] = s

sim = Simulator()
df = sim.simulate(assumptions, strategies, runs=400)

gr = Grapher()
chart = gr.graph(df)
st.altair_chart(chart)
st.title('Asset Sim')

st.markdown('''
    Asset Sim is a tool to visualize long term investing strategies. Quickly simulate different strategies and market assumptions to see how they affect your finances.
    ## FAQ
    ### How is the graph generated?
    A monte-carlo simulation simulates many runs with the given assumptions and aggegates them together. 
    The lines are the median amounts of assets at a point in time. The error bands show first and third quartiles.

    ### Why median?
    Medians are less sensitive to outliers than means. A few lucky runs can blow up a mean.

    ### Where do the default values come from?
    They are best guesses based on historical data of the S&P 500.