Esempio n. 1
0
def main(csv_file, post_metadata):
    """
    Given a valid CSV file's path and a series of parameters given by the user via the web gui, execute the algorithm on
    the given input. If some of the parameters are not valid, the method returns an error string message.
    It returns the output of the standard_algorithm in a dict where each element is the output of the algorithm
    for each combination given as input and with the combination itself as a key in JSON format.
    :param csv_file: valid path to a CSV file
    :type csv_file: str
    :param post_metadata: dict containing the user's parameters
    :type post_metadata: werkzeug.datastructures.ImmutableMultiDict
    :return: a dict containing the output of each combination or with an error message
    :rtype: dict
    """
    with Timer() as total:
        params = param_to_dict(post_metadata)
        args = {
            'sep': params['separator'],
            'semantic': params['semantic'],
            'missing': params['missing'],
            'datetime': params['datetime']
        }
        if 'header' in params:
            args['first_col_header'] = params['header']
        with Timer() as mtxtime:
            try:
                diff_mtx = DiffMatrix(csv_file, **args)
                labels = diff_mtx.get_labels()
            except Exception as e:
                return {"error": str(e.__doc__)}
                #return {"error":str(traceback.format_exc())}
        cols_count = ut.get_cols_count(csv_file, params['separator'])
        hss = extract_hss(cols_count, params['lhs'], params['rhs'])
        response = {
            'mtxtime': "{:.2f}".format(mtxtime.interval),
            'result': {},
            'timing': []
        }
        for combination in hss:
            with Timer() as c:
                try:
                    comb_dist_mtx = diff_mtx.split_sides(combination)
                    nd = RFDDiscovery(comb_dist_mtx)
                    r = nd.get_rfds(nd.standard_algorithm, combination)
                    rhs = r[[0]]
                    lhs = r.drop([r.columns[0]], axis=1)
                    result_df = pnd.concat([lhs, rhs], axis=1)
                    response['result'][json.dumps(
                        name_combination(labels,
                                         combination))] = result_df.to_csv(
                                             sep=params['separator'])
                except Exception as e:
                    return {"error": str(e.__doc__)}
            response['timing'].append("{:.2f}".format(c.interval))
    response['total'] = "{:.2f}".format(total.interval)
    return response
 def extract_args(self, args):
     """
     Given the list of command line parameters, it extracts the parameters given according to the format
     described in the Usage section of the README.
     If some parameter cannot be interpreted, then the function will raise an AssertionError.
     If the path of the CSV is missing or is not valid, the programm will print an error message and it will end.
     With the help option, it will print on the standard output the help about the execution of this program.
     :param args: list of command line argument given at the startup
     :type args: list
     :return: list of parameters extracted
     :rtype: tuple
     :raise: AssertionError
     """
     try:
         # Default values
         c_sep, has_header, semantic, has_dt, missing, ic, human = '', 0, True, False, "?", False, False
         csv_file = ''
         left_half_side = []
         right_half_side = []
         options, args = getopt.getopt(args, "c:r:l:s:hm:d:vi:", ["semantic", "help", "human"])
         for option, arg in options:
             if option == '-v':
                 print("rdf-discovery version 0.0.1")
                 sys.exit(0)
             if option == '-c':
                 csv_file = arg
             elif option == '-r':
                 right_half_side = [int(arg)]
                 if len(right_half_side) > 1:
                     print("You can specify at most 1 RHS attribute")
                     sys.exit(-1)
             elif option == '-l':
                 left_half_side = [int(_) for _ in arg.split(',')]
             elif option == '-s':
                 c_sep = arg
             elif option == '-h':
                 has_header = 0
             elif option == '--semantic':
                 semantic = True
             elif option == '-m':
                 missing = arg
             elif option == '-d':
                 has_dt = [int(_) for _ in arg.split(',')]
             elif option == '-i':
                 ic = int(arg)
             elif option == '--human':
                 human = True
             elif option == '--help':
                 self.usage()
                 sys.exit(0)
             else:
                 assert False, "unhandled option"
     except getopt.GetoptError as getopt_err:
         print(getopt_err)
         self.usage()
         sys.exit(2)
     except TypeError as t_err:
         print("Error while trying to convert a string to numeric: {}".format(str(t_err)))
         sys.exit(-1)
     except Exception as ex:
         print("Error while trying to extract arguments: {}".format(str(ex)))
         sys.exit(-1)
     # understanding
     try:
         c_sep_, has_header_ = self.extract_sep_n_header(c_sep, csv_file, has_header)
         if c_sep == '':
             c_sep = c_sep_
         if has_header is None:
             has_header = has_header_
         cols_count = ut.get_cols_count(csv_file, c_sep)
         half_sides_specifications = self.extract_hss(cols_count, left_half_side, right_half_side)
     except Exception as ex:
         print("Error while trying to understand arguments: {}".format(str(ex)))
         sys.exit(-1)
     return c_sep, csv_file, has_header, semantic, has_dt, missing, ic, human, half_sides_specifications