コード例 #1
0
    feature_schema_file = sys.argv[
        11]  # full path of the file listing columns and types
    # for each feature file
    cleanup_flag = sys.argv[
        12]  # indicates if cleanup should be applied (flag != 0)
    argument_path = sys.argv[
        13]  # full path of the folder containing jsons files for
    # additional arguments for extracting features of each
    # table. It is assumed that file names are as follow:
    # xxxx-feature-additional.json where xxxx is the feature
    # name

    utils.createFolder(job_path, 0o744)
    utils.createFolder(feature_path, 0o744)

    participants = utils.readList(participant_file)
    for id in participants:
        # create folder to store extracted features for each participant id
        result_dir = '{result_directory}/pid{pid}'.format(
            result_directory=feature_path, pid=id)
        utils.createFolder(result_dir, 0o744)

    tables = utils.readList(table_file)
    for table in tables:
        job_dir = '{job_directory}/{tbl}'.format(job_directory=job_path,
                                                 tbl=table)
        utils.createFolder(job_dir, 0o744)

    for id in participants:
        for table in tables:
            # create the job for features associated with each device
コード例 #2
0
        2]  # full path of the folder where *.job files are stored
    python_cmd = sys.argv[
        3]  # python command to run the count aggreagtion script
    code_path = sys.argv[
        4]  # full static path to where the count aggregation code is
    result_path = sys.argv[
        5]  # full path of the result root to store aggregate counts for each table
    table_path = sys.argv[
        6]  # full path of table data root containing sub-folders for each
    # pid and *.txt files for each counts per participant per table
    # (fine if relative to code base)
    participant_file = sys.argv[
        7]  # full path of the csv file listing 3-digit zero-padded
    # participant id's
    start_date = sys.argv[8]  # lower end of the date range to aggregate counts
    end_date = sys.argv[9]  # upper end of the date range to aggregate counts

    utils.createFolder(job_path, 0o744)
    utils.createFolder(result_path, 0o744)

    tables = utils.readList(table_file)
    for table in tables:

        # create the job for aggregating counts across participants for table
        job_content = jobContent(code_path, python_cmd, table_path, table,
                                 participant_file, start_date, end_date,
                                 result_path)
        job_file = '{job_directory}/{tbl}.job'.format(job_directory=job_path,
                                                      tbl=table)
        utils.createJob(job_file, job_content)
コード例 #3
0
    table_path = sys.argv[
        1]  # full path of the folder containing *.txt count data per participant
    table_name = sys.argv[2]  # name of the table
    participant_file = sys.argv[
        3]  # full path of the file listing zero-padded 3-digit participant id's
    start_date = sys.argv[4]  # starting date to aggregate counts
    end_date = sys.argv[5]  # end date to aggregate counts
    result_path = sys.argv[
        6]  # full path of the folder to store counts aggregated across participants
    # for the give table

    dates = pd.DataFrame(data=pd.date_range(start=start_date, end=end_date),
                         columns=['date'])

    participants = utils.readList(participant_file)
    for id in participants:
        table_file = '{0}/pid{1}/{2}.txt'.format(table_path, id, table_name)
        data = pd.read_csv(table_file,
                           header=0,
                           dtype={
                               'date': str,
                               'record_num': int
                           },
                           parse_dates=['date'],
                           sep='\t',
                           lineterminator='\n',
                           encoding="ISO-8859-1")
        dates = pd.merge(dates, data, on='date', how='left')
        dates.rename(columns={'record_num': id}, inplace=True)