예제 #1
0
def numeric_mapper(input_file=None, delimiter=None, weighted=None):
    """
    This function maps the strings to numeric values
    :param input_file: Input file path
    :param delimiter: Column separator
    :param weighted: yes/no if the file contains weights of the edges or not
    :return: file object
    """
    sanity_status = file_operations.sanity_check(input_file=input_file)
    if sanity_status == 1:
        headers = file_operations.generate_headers(weighted)
        output_file_name = file_operations.get_output_file(input_file)
        data_frame = load_file(input_file, delimiter, headers)
        print('Data cleanup complete!', color='green', log_type='info')
        mapping_dict = extract_nodes(data_frame)
        print('Numeric mapping reference creation complete!',
              color='green',
              log_type='info')
        start_time = time.time()
        print('Numeric mapping started at: {}'.format(
            datetime.datetime.now().strftime("%H:%M:%S")),
              log_type='info')
        numeric_data_frame = numeric_mapping(data_frame, mapping_dict)
        mapping_end_time = time.time() - start_time
        print('Elapsed time for mapping: ', log_type='info', end='')
        print('{}'.format(
            time.strftime("%H:%M:%S", time.gmtime(mapping_end_time))),
              color='cyan',
              text_format='bold')
        print('Numeric mapping complete!', color='green', log_type='info')

        create_output_file(numeric_data_frame, output_file_name)
    else:
        print('Sanity check failed!', log_type='error', color='red')
        sys.exit(1)
예제 #2
0
def filter_columns(input_file=None,
                   column_indexes=None,
                   delimiter=None,
                   output_file=None):
    """
    This function filters text input depending on columns and delimiter
    :param input_file: A file path to raw data file
    :param column_indexes: Indexes of the columns that needs to be filtered out (index starts from 1)
    :param delimiter: Column separator in input/output file (default is ',' [comma])
    :param output_file: A file path where the output will be stored
    :return: File object
    """
    # Check sanity of input
    sanity_status = file_operations.sanity_check(input_file=input_file,
                                                 column_indexes=column_indexes,
                                                 delimiter=delimiter,
                                                 output_file=output_file)

    # Check if sanity check is Okay
    if sanity_status == 1:
        if delimiter is None:
            command_delimiter = ' '  # Using default delimiter
        else:
            command_delimiter = delimiter
        command = create_command(input_file, column_indexes, command_delimiter,
                                 output_file)
        if command:
            create_output_file(command)
        else:
            print('There was an error in command creation!', log_type='error')
            sys.exit(1)
    else:
        print('Sanity check failed!', log_type='error', color='red')
        sys.exit(1)
예제 #3
0
def compose_snap_graph(input_file=None, delimiter=None, weighted=None):
    """
    This function creates a snap graph from provided file
    :param input_file:  Input file path
    :param delimiter: Column separator in the file
    :param weighted: Simple yes/no if the input file is weighted or not
    :return: snap graph
    """
    # Check sanity status of input
    sanity_status = file_operations.sanity_check(input_file, delimiter,
                                                 weighted)

    # Create a snap graph
    if sanity_status == 1:
        if delimiter is None:
            delimiter = ' '  # Using default (whitespace) delimiter
        # Load edges list from input file
        print('Creating SNAP graph.....', log_type='info')
        # snap.LoadEdgeList(snap.PUNGraph = snap graph type, input file, source column, destination column, delimiter)
        snap_graph = snap.LoadEdgeList(snap.PUNGraph, input_file, 0, 1,
                                       delimiter)
        # print('Trying to delete self edges.....', log_type='info')
        # Making sure there are no self-edges
        # snap_graph = snap.DelSelfEdges(snap_graph)

        # Return
        return snap_graph
    else:
        print('Sanity check failed!', log_type='error', color='red')
        sys.exit(1)
예제 #4
0
def run_infomap(input_file=None, delimiter=None, weighted=None, trials=None, output=None):
    """
    This function runs the infomap algorithm
    :param input_file: Input file with edges of the graph
    :param delimiter: Field separator
    :param weighted: are the edges weighted?
    :param trials: number of trials/run to find out community
    :param output: whether output file will be created or not (boolean - yes/no)
    :return: <> file object <>
    """
    # Create a graph from dataset
    # ntx_graph = graph_composer.compose_ntx_graph(input_file, delimiter, weighted)

    # Sanity check and created infomap Network from the input file
    # Check sanity status of input
    sanity_status = file_operations.sanity_check(input_file, delimiter, weighted)

    if sanity_status == 1:
        # Find Communities from the graph
        total_communities, infomap_communities = infomap_find_communities(input_file, trials)

        # Create output file
        if output is None or output == 'Yes' or output == 'Y' or output == 'y' or output == 'yes':
            output_file = file_operations.generate_output_filename(input_file, prefix='infomap')
            file_operations.create_community_file(infomap_communities, output_file)
        else:
            pass

        print('Total communities found with INFOMAP algorithm: ', color='green', log_type='info', end='')
        print('{}'.format(total_communities), color='cyan', text_format='bold')
    else:
        print('Sanity check failed!', log_type='error', color='red')
        sys.exit(1)
예제 #5
0
def compose_ntx_graph(input_file=None, delimiter=None, weighted=None):
    """
    This function creates a networkx graph from provided file
    :param input_file: Input file path
    :param delimiter: separator for the column of the input file
    :param weighted: Simple yes/no if the input file is weighted or not
    :return: networkx graph
    """
    # Check sanity status of input
    sanity_status = file_operations.sanity_check(input_file, delimiter,
                                                 weighted)

    # Get data for weighted networkx graph
    file_is_weighted = file_operations.is_weighted(weighted)

    # Create a networkx graph from the edgelist
    if sanity_status == 1:
        if file_is_weighted:
            print('Creating Networkx weighted graph.....', log_type='info')
            try:
                ntx_graph = nx.read_weighted_edgelist(input_file,
                                                      delimiter=delimiter,
                                                      nodetype=int)
            except Exception as e:
                print(
                    'Can not create weighted networkx graph. ERROR: {}'.format(
                        e),
                    color='red',
                    log_type='error')
                sys.exit(1)
        else:
            print('Creating Networkx unweighted graph.....', log_type='info')
            try:
                ntx_graph = nx.read_edgelist(input_file,
                                             delimiter=delimiter,
                                             nodetype=int)
            except Exception as e:
                print('Can not create unweighted networkx graph. ERROR: {}'.
                      format(e),
                      color='red',
                      log_type='error')
                sys.exit(1)

        # Return graph
        return ntx_graph
    else:
        print('Sanity check failed!', log_type='error', color='red')
        sys.exit(1)
예제 #6
0
def clip_text(input_file=None, delimiter=None, start_date=None, interval=None):
    """
    This function controls the other functions
    :param input_file: Input file to clip
    :param delimiter: Column separator for input file
    :param start_date: Start date of clipping (dd-mm-YYYY)
    :param interval: for how many days (int)
    :return: clipped text, rest of the text
    """
    # Check sanity of the input file
    sanity_status = file_operations.sanity_check(input_file=input_file,
                                                 delimiter=delimiter)

    # If sanity check is passed, read and clip the text
    if sanity_status == 1:
        # Load input file
        data_frame = load_file(input_file=input_file, delimiter=delimiter)

        # Clip data frame
        clipped_text = clip_data_frame(data_frame=data_frame,
                                       start_date=start_date,
                                       periods=interval)
        print(clipped_text.head(3))