def numeric_mapper(input_file=None, delimiter=None, weighted=None): """ This function maps the strings to numeric values :param input_file: Input file path :param delimiter: Column separator :param weighted: yes/no if the file contains weights of the edges or not :return: file object """ sanity_status = file_operations.sanity_check(input_file=input_file) if sanity_status == 1: headers = file_operations.generate_headers(weighted) output_file_name = file_operations.get_output_file(input_file) data_frame = load_file(input_file, delimiter, headers) print('Data cleanup complete!', color='green', log_type='info') mapping_dict = extract_nodes(data_frame) print('Numeric mapping reference creation complete!', color='green', log_type='info') start_time = time.time() print('Numeric mapping started at: {}'.format( datetime.datetime.now().strftime("%H:%M:%S")), log_type='info') numeric_data_frame = numeric_mapping(data_frame, mapping_dict) mapping_end_time = time.time() - start_time print('Elapsed time for mapping: ', log_type='info', end='') print('{}'.format( time.strftime("%H:%M:%S", time.gmtime(mapping_end_time))), color='cyan', text_format='bold') print('Numeric mapping complete!', color='green', log_type='info') create_output_file(numeric_data_frame, output_file_name) else: print('Sanity check failed!', log_type='error', color='red') sys.exit(1)
def filter_columns(input_file=None, column_indexes=None, delimiter=None, output_file=None): """ This function filters text input depending on columns and delimiter :param input_file: A file path to raw data file :param column_indexes: Indexes of the columns that needs to be filtered out (index starts from 1) :param delimiter: Column separator in input/output file (default is ',' [comma]) :param output_file: A file path where the output will be stored :return: File object """ # Check sanity of input sanity_status = file_operations.sanity_check(input_file=input_file, column_indexes=column_indexes, delimiter=delimiter, output_file=output_file) # Check if sanity check is Okay if sanity_status == 1: if delimiter is None: command_delimiter = ' ' # Using default delimiter else: command_delimiter = delimiter command = create_command(input_file, column_indexes, command_delimiter, output_file) if command: create_output_file(command) else: print('There was an error in command creation!', log_type='error') sys.exit(1) else: print('Sanity check failed!', log_type='error', color='red') sys.exit(1)
def compose_snap_graph(input_file=None, delimiter=None, weighted=None): """ This function creates a snap graph from provided file :param input_file: Input file path :param delimiter: Column separator in the file :param weighted: Simple yes/no if the input file is weighted or not :return: snap graph """ # Check sanity status of input sanity_status = file_operations.sanity_check(input_file, delimiter, weighted) # Create a snap graph if sanity_status == 1: if delimiter is None: delimiter = ' ' # Using default (whitespace) delimiter # Load edges list from input file print('Creating SNAP graph.....', log_type='info') # snap.LoadEdgeList(snap.PUNGraph = snap graph type, input file, source column, destination column, delimiter) snap_graph = snap.LoadEdgeList(snap.PUNGraph, input_file, 0, 1, delimiter) # print('Trying to delete self edges.....', log_type='info') # Making sure there are no self-edges # snap_graph = snap.DelSelfEdges(snap_graph) # Return return snap_graph else: print('Sanity check failed!', log_type='error', color='red') sys.exit(1)
def run_infomap(input_file=None, delimiter=None, weighted=None, trials=None, output=None): """ This function runs the infomap algorithm :param input_file: Input file with edges of the graph :param delimiter: Field separator :param weighted: are the edges weighted? :param trials: number of trials/run to find out community :param output: whether output file will be created or not (boolean - yes/no) :return: <> file object <> """ # Create a graph from dataset # ntx_graph = graph_composer.compose_ntx_graph(input_file, delimiter, weighted) # Sanity check and created infomap Network from the input file # Check sanity status of input sanity_status = file_operations.sanity_check(input_file, delimiter, weighted) if sanity_status == 1: # Find Communities from the graph total_communities, infomap_communities = infomap_find_communities(input_file, trials) # Create output file if output is None or output == 'Yes' or output == 'Y' or output == 'y' or output == 'yes': output_file = file_operations.generate_output_filename(input_file, prefix='infomap') file_operations.create_community_file(infomap_communities, output_file) else: pass print('Total communities found with INFOMAP algorithm: ', color='green', log_type='info', end='') print('{}'.format(total_communities), color='cyan', text_format='bold') else: print('Sanity check failed!', log_type='error', color='red') sys.exit(1)
def compose_ntx_graph(input_file=None, delimiter=None, weighted=None): """ This function creates a networkx graph from provided file :param input_file: Input file path :param delimiter: separator for the column of the input file :param weighted: Simple yes/no if the input file is weighted or not :return: networkx graph """ # Check sanity status of input sanity_status = file_operations.sanity_check(input_file, delimiter, weighted) # Get data for weighted networkx graph file_is_weighted = file_operations.is_weighted(weighted) # Create a networkx graph from the edgelist if sanity_status == 1: if file_is_weighted: print('Creating Networkx weighted graph.....', log_type='info') try: ntx_graph = nx.read_weighted_edgelist(input_file, delimiter=delimiter, nodetype=int) except Exception as e: print( 'Can not create weighted networkx graph. ERROR: {}'.format( e), color='red', log_type='error') sys.exit(1) else: print('Creating Networkx unweighted graph.....', log_type='info') try: ntx_graph = nx.read_edgelist(input_file, delimiter=delimiter, nodetype=int) except Exception as e: print('Can not create unweighted networkx graph. ERROR: {}'. format(e), color='red', log_type='error') sys.exit(1) # Return graph return ntx_graph else: print('Sanity check failed!', log_type='error', color='red') sys.exit(1)
def clip_text(input_file=None, delimiter=None, start_date=None, interval=None): """ This function controls the other functions :param input_file: Input file to clip :param delimiter: Column separator for input file :param start_date: Start date of clipping (dd-mm-YYYY) :param interval: for how many days (int) :return: clipped text, rest of the text """ # Check sanity of the input file sanity_status = file_operations.sanity_check(input_file=input_file, delimiter=delimiter) # If sanity check is passed, read and clip the text if sanity_status == 1: # Load input file data_frame = load_file(input_file=input_file, delimiter=delimiter) # Clip data frame clipped_text = clip_data_frame(data_frame=data_frame, start_date=start_date, periods=interval) print(clipped_text.head(3))