Example #1
0
def test_trim_sheet_fail(sample_spreadsheet, trimmed_sample_spreadsheet, onset,
                         offset):
    columns = ["MomSpeech", "MomObject"]
    sheet = pv.load_opf(sample_spreadsheet)
    sheet = pv.trim_sheet(onset, offset, sheet, True, False, *columns)
    sheet_trimmed = pv.load_opf(trimmed_sample_spreadsheet)
    assert sheet != sheet_trimmed
Example #2
0
def test_df_to_csv(sample_spreadsheet):
    sheet = pv.load_opf(sample_spreadsheet)
    df = sheet.to_df()
    # df.to_csv('./DatavyuSampleSpreadsheet.csv')

    df = sheet.to_df("MomSpeech")
    log.info(df)
Example #3
0
def test_json(sample_spreadsheet):
    sheet = pv.load_opf(sample_spreadsheet)
    pv.save_json(sheet, "test.json")
    json_sheet = pv.load_json("test.json")
    momspeech = json_sheet.get_column("MomSpeech")
    assert len(momspeech.sorted_cells()) == 20
    o_momspeech = sheet.get_column("MomSpeech")
    for oc, c in zip(o_momspeech.cells, momspeech.cells):
        assert all([x == y for x, y in zip(oc.get_values(), c.get_values())])
    # Cleanup
    os.remove("test.json")
Example #4
0
def test_spreadsheet_to_df(sample_spreadsheet):
    sheet = pv.load_opf(sample_spreadsheet)
    df = sheet.to_df()

    pd.set_option("display.max_columns", None)
    log.info(df)
    assert 156 == len(df)

    ms = sheet.to_df("MomSpeech")
    log.info(ms)
    assert 20 == len(ms)
Example #5
0
def parseAndTrimOpf(opf_path, columns_list, onset, offset):
    """
    Parse and cut OPF file according to an onset and offset passed via arguments or
    found in the ingest JSON file.
    """
    logger.info("Trim opf %s from %d to %d", opf_path, onset, offset)
    opf_file_orig = os.path.realpath(opf_path)
    opf_path_cut = os.path.splitext(opf_file_orig)[0] + '_cut.opf'
    copyfile(opf_file_orig, opf_path_cut)
    sheet = pv.load_opf(opf_path_cut)
    if sheet.get_column_list() < 1:
        logger.error('OPF file is empty')
        return None
    if columns_list is not None:
        sheet.columns = {
            colname: col
            for (colname, col) in sheet.columns.items()
            if colname in columns_list
        }

    _columns_to_trim = [col for col in sheet.columns if col not in _exception]

    for col in [sheet.columns[c] for c in _columns_to_trim]:
        col.cells = [
            cell for cell in col.cells
            if (cell.onset >= onset and cell.offset <= offset) or (
                cell.onset < onset < cell.offset <= offset) or (
                    onset < cell.onset < offset < cell.offset)
        ]

    for colname, col in sheet.columns.items():
        for cell in col.cells:
            cell.onset = max(max(cell.onset, onset) - onset, 0)
            cell.offset = max(min(cell.offset, offset) - onset, 0)

    sheet.columns = {
        colname: col
        for (colname, col) in sheet.columns.items() if len(col.cells) > 0
    }

    pv.save_opf(sheet, opf_path_cut, *sheet.columns.keys())
    return opf_path_cut
Example #6
0
#  This just to keep a record of all opf files processed, no need to be changed
fields = [
    "opf_original", "opf_trimmed", "onset_timestamp", "offset_timestamp",
    "onset_millis", "offset_millis"
]
rows = []

# change the prefix of the created opf files
file_cut = "{}_cut.opf"

for root, dirs, files in os.walk(source_folder):
    for file in files:
        if file.endswith(".opf"):
            print("Loading file: {}".format(file))
            original_file = os.path.join(root, file)
            sheet = pv.load_opf(original_file)

            col = sheet.get_column(column_ref)
            if col is None:
                continue

            exception = {}
            for colname in columns_exception:
                if colname in sheet.columns.keys():
                    exception[colname] = sheet.get_column(colname)

            onset = col.cells[cell_ref].onset
            offset = col.cells[cell_ref].offset
            print("Found onset: {} offset: {} in {}".format(
                pv.to_timestamp(onset), pv.to_timestamp(offset), column_ref))
Example #7
0
def test_trim_sheet(sample_spreadsheet, trimmed_sample_spreadsheet, onset,
                    offset):
    sheet = pv.load_opf(sample_spreadsheet)
    sheet = pv.trim_sheet(onset, offset, sheet, True, False)
    sheet_trimmed = pv.load_opf(trimmed_sample_spreadsheet)
    assert sheet == sheet_trimmed
Example #8
0
def test_load_sample(sample_spreadsheet):
    sheet = pv.load_opf(sample_spreadsheet)
    assert len(sheet.get_column_list()) == 6

    momspeech = sheet.get_column("MomSpeech")
    assert len(momspeech.sorted_cells()) == 20