def extract(raw_txt, logger): c = Cleaner() cleaned_text_list = c.clean(raw_txt) logger.info('Done cleaning') logger.debug(len(cleaned_text_list)) logger.debug(cleaned_text_list) matrix_builder = MatrixBuilder() matrix = matrix_builder.build_sim_matrix(cleaned_text_list, logger) logger.info('Done building sim matrix') logger.debug('Dimensions: {}'.format(matrix.shape)) logger.debug(matrix) g = Grapher() pageranks = g.graph(matrix) logger.info('Generated graph and got pageranks') logger.debug(pageranks) total_doc_size = len(cleaned_text_list) if total_doc_size in range(0, 300): summary_length = int(0.4 * total_doc_size) elif total_doc_size in range(301, 800): summary_length = int(0.2 * total_doc_size) elif total_doc_size in range(801, 1500): summary_length = int(0.1 * total_doc_size) else: summary_length = int(0.05 * total_doc_size) top_ranked = nlargest(summary_length, pageranks, key=pageranks.get) top_ranked.sort() cl = Cluster() top_ranked = cl.splitIntoParagraph(top_ranked, 7.5) logger.debug(top_ranked) result = '' for paragraph in top_ranked: for key in paragraph: top_ranked_sentence = cleaned_text_list[key] result += '{}. '.format(top_ranked_sentence) result += '\n\n' try: del c del cleaned_text_list del matrix_builder del matrix del g del pageranks del total_doc_size del summary_length del top_ranked del cl del raw_txt except: pass return result
utils.exit_unless_accessible(args.calls) utils.setup_logging(verbosity=args.verbose) # Load graph database (remove duplicates) df_all = df_from_csv_file(args.calls) df = df_all.drop_duplicates() from_fun, to_fun = args.from_function, args.to_function left, right = search_settings(args.direction, args.cutoff) merge_on = ["caller_filename", "caller_function", "callee_filename", "callee_function"] chains_df_right = pd.DataFrame(columns=merge_on) if right: chains_df_right = find_chains_directed_df(df, from_fun, to_fun, right) chains_df_left = pd.DataFrame(columns=merge_on) if left: chains_df_left = find_chains_directed_df(df, from_fun, to_fun, left) _LOGGER.info("Generating the results...") df_chains = pd.concat([chains_df_left, chains_df_right]).drop_duplicates() df_chains = pd.merge(df_all, df_chains, on=merge_on, how='inner') if args.out.endswith(".csv"): df_to_csv_file(df_chains, args.out) else: grapher = Grapher(args.out) grapher.graph(df_chains) grapher.render(args.out) _LOGGER.info("Done")
min_value=0.0) assumptions.RRA = st.number_input('RRA', value=2.0, step=1.0, min_value=0.1) # re initialize strategies based on user defined parameters for i in range(len(strategies)): s = strategies[i].__class__(assumptions) strategies[i] = s sim = Simulator() df = sim.simulate(assumptions, strategies, runs=400) gr = Grapher() chart = gr.graph(df) st.altair_chart(chart) st.title('Asset Sim') st.markdown(''' Asset Sim is a tool to visualize long term investing strategies. Quickly simulate different strategies and market assumptions to see how they affect your finances. ## FAQ ### How is the graph generated? A monte-carlo simulation simulates many runs with the given assumptions and aggegates them together. The lines are the median amounts of assets at a point in time. The error bands show first and third quartiles. ### Why median? Medians are less sensitive to outliers than means. A few lucky runs can blow up a mean. ### Where do the default values come from? They are best guesses based on historical data of the S&P 500.