Exemple #1
0
 def backup_files(self, backup_subfolder):
     backup_path = os.path.join(self.backup_folder, backup_subfolder)
     if not os.path.isfile(
             get_backup_file_path(backup_path, self.scene_folder,
                                  self.SCENE_OBJECTS_FILE)):
         pbar = ProgressBar([self.SCENE_OBJECTS_FILE],
                            title="backup " + self.SCENE_OBJECTS_FILE)
         backup_file(backup_path,
                     self.scene_folder,
                     self.SCENE_OBJECTS_FILE,
                     pbar=pbar)
Exemple #2
0
def combine_wavs_batch(audio_paths, method, **kargv):
    audio_paths.sort()
    method = method.lower()

    if method == "librosa":
        fn = partial(split_on_silence_with_librosa, **kargv)
    elif method == "pydub":
        fn = partial(split_on_silence_with_pydub, **kargv)

    parallel_run(fn, audio_paths,
            desc="Split on silence", parallel=False)

    audio_path = audio_paths[0]
    spl = os.path.basename(audio_path).split('.', 1)
    prefix = os.path.dirname(audio_path)+"/"+spl[0]+"."
    in_ext = audio_path.rsplit(".")[1]

    data = load_json(config.alignment_path, encoding="utf8")

    #print(data)

    for i in range(len(wavs)-1):
        if len(wavs[i]) > 15000:
             continue
        if not paths[i] in data:
             continue

        sum = len(wavs[i])
        filename = prefix + str(i).zfill(4)+"."
        asr = data[paths[i]]+" "
        concated = wavs[i]
        for j in range(i+1, len(wavs)):
             sum += len(wavs[j])
             sum += 400
             if sum > 15000:
                break
             if not paths[j] in data:
                break
             filename = filename + str(j).zfill(4) + "."
             asr = asr + data[paths[j]] + " "
             concated = concated + silence + wavs[j]
             final_fn = filename+"wav"
             data[final_fn] = asr
             concated.export(final_fn, format="wav")
             print(filename+"wav | "+str(len(concated)))

    if os.path.exists(config.alignment_path):
        backup_file(config.alignment_path)

    write_json(config.alignment_path, data)
    get_durations(data.keys(), print_detail=False)
    return 0
Exemple #3
0
def get_output_base_path(load_path, eval_dirname="eval"):
    if not os.path.isdir(load_path):
        base_dir = os.path.dirname(load_path)
    else:
        base_dir = load_path

    base_dir = os.path.join(base_dir, eval_dirname)
    if os.path.exists(base_dir):
        backup_file(base_dir)
    makedirs(base_dir)

    m = re.compile(r'.*?\.ckpt\-([0-9]+)').match(load_path)
    base_path = os.path.join(base_dir,
            'eval-%d' % int(m.group(1)) if m else 'eval')
    return base_path
def _roll_back(change, settings):
    update_id = change['update_id']
    tmp_dir = path.join('/tmp', update_id)
    logger.info("Sanitizing [{}]".format(tmp_dir))
    if os.path.exists(tmp_dir):
        shutil.rmtree(tmp_dir)
    os.mkdir(tmp_dir)
    origin_zip = path.join(utils.sanitize_path(settings['backup_dir']),
                           change['change_id'] + '.zip')

    logger.info("Start unzip...")
    utils.unzip_file(tmp_dir, origin_zip)
    logger.info("Unzip finished.")

    pack_file = path.join(tmp_dir, "package-info.json")
    logger.info("reading package-info.json from [{}]".format(pack_file))

    with open(pack_file, 'r') as f:
        content = f.read()
    package = json.loads(content)

    logger.info("creating rollback change...")
    roll_back_change = create_roll_back_change(change, package)
    logger.info("creation of rollback change finished")

    logger.info("start backing up files...")
    backup_location = utils.backup_file(roll_back_change, settings)
    logger.info("back up files finished")
    for file in roll_back_change['files_to_update']:
        logger.info("Performing rollback for file [{}]...".format(file))
        utils.do_single_file_move(tmp_dir, file, backup_location)
        logger.info("Performing rollback for file finished. [{}]".format(file))

    return roll_back_change
def __install_update(change, settings):
    logger.info("Backing up files for change [{}]".format(change['change_id']))
    backup_tmp_dir = utils.backup_file(change, settings)
    logger.info("backed up file is in [{}]".format(backup_tmp_dir))
    tmp_loc = '/tmp/' + change['change_id']
    logger.info("sanitizing [{}]".format(tmp_loc))
    if os.path.exists(tmp_loc):
        shutil.rmtree(tmp_loc)
    os.mkdir(tmp_loc)
    logger.info("Unzipping file...")
    utils.unzip_file(
        tmp_loc,
        os.path.join(utils.sanitize_path(settings['download_dir']),
                     change['update_id'], change['package_name']))
    logger.info("Unzip finished.")
    for file in change['files_to_update']:
        logger.info("Apply change for file [{}]".format(file))
        utils.do_single_file_move(tmp_loc, file, backup_tmp_dir)
    logger.info("cleaning [{}]".format(tmp_loc))
    shutil.rmtree(tmp_loc)
    logger.info("cleaning [{}]".format(backup_tmp_dir))
    shutil.rmtree(backup_tmp_dir)
Exemple #6
0
    return results


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--audio_pattern', required=True)
    parser.add_argument('--recognition_filename', default="recognition.json")
    parser.add_argument('--sample_rate', default=16000, type=int)
    parser.add_argument('--pre_silence_length', default=1, type=int)
    parser.add_argument('--post_silence_length', default=1, type=int)
    parser.add_argument('--max_duration', default=60, type=int)
    config, unparsed = parser.parse_known_args()

    audio_dir = os.path.dirname(config.audio_pattern)

    for tmp_path in glob(os.path.join(audio_dir, "*.tmp.*")):
        remove_file(tmp_path)

    paths = glob(config.audio_pattern)
    paths.sort()
    results = text_recognition_batch(paths, config)

    base_dir = os.path.dirname(audio_dir)
    recognition_path = \
            os.path.join(base_dir, config.recognition_filename)

    if os.path.exists(recognition_path):
        backup_file(recognition_path)

    write_json(recognition_path, results)
Exemple #7
0
        results.update(item)

    found_count = sum([type(value) == str for value in results.values()])
    print(" [*] # found: {:.5f}% ({}/{})".format(
        len(results) / len(data), len(results), len(data)))
    print(" [*] # exact match: {:.5f}% ({}/{})".format(
        found_count / len(items), found_count, len(items)))

    return results


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--recognition_path', required=True)
    parser.add_argument('--alignment_filename', default="alignment.json")
    parser.add_argument('--score_threshold', default=0.4, type=float)
    parser.add_argument('--recognition_encoding', default='utf-8')
    config, unparsed = parser.parse_known_args()

    results = align_text_batch(config)

    base_dir = os.path.dirname(config.recognition_path)
    alignment_path = \
            os.path.join(base_dir, config.alignment_filename)

    if os.path.exists(alignment_path):
        backup_file(alignment_path)

    write_json(alignment_path, results)
    duration = get_durations(results.keys(), print_detail=False)
Exemple #8
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--audio_pattern', required=True)
    parser.add_argument('--alignment_path', required=True)
    parser.add_argument('--out_ext', default='wav')
    parser.add_argument('--method',
                        choices=['librosa', 'pydub'],
                        required=True)
    config = parser.parse_args()

    data = load_json(config.alignment_path, encoding="utf8")

    audio_paths = glob(config.audio_pattern)

    for path in audio_paths:
        single_path = path.replace('.wav', '_s??.wav')
        single_paths = glob(single_path)
        combine_wavs_batch(
            single_paths,
            config.method,
            out_ext=config.out_ext,
        )
        wavs = []
        paths = []

    if os.path.exists(config.alignment_path):
        backup_file(config.alignment_path)

    write_json(config.alignment_path, data)
    get_durations(data.keys(), print_detail=False)
Exemple #9
0
 def backup_files(self, backup_path, dry_mode=False, pbar=None):
     backup_file(backup_path, self.folder, self.dbf_file_name, dry_mode=dry_mode, pbar=pbar)
     backup_file(backup_path, self.folder, self.shp_file_name, dry_mode=dry_mode, pbar=pbar)
     backup_file(backup_path, self.folder, self.shx_file_name, dry_mode=dry_mode, pbar=pbar)
     self.backup_file(backup_path, dry_mode=dry_mode, pbar=pbar)
    def backup_file(self, backup_path, dry_mode=False, pbar=None):
        if self.optimization_in_progress:
            return

        backup_file(backup_path, self.folder, self.model_file, dry_mode=dry_mode, pbar=pbar)
 def backup_file(self, backup_path, dry_mode=False, pbar=None):
     backup_file(backup_path,
                 self.folder,
                 self.definition_file,
                 dry_mode=dry_mode,
                 pbar=pbar)
Exemple #12
0
def main():
    # data downloaded from ftp://ftp.ncbi.nih.gov/pub/taxonomy/
    args = parse_args()
    nodes_df = load_nodes(args.nodes_file)
    names_df = load_names(args.names_file)
    df = nodes_df.merge(names_df, on='tax_id')
    df = df[['tax_id', 'parent_tax_id', 'rank', 'name_txt']]
    df.reset_index(drop=True, inplace=True)
    logging.info('# of tax ids: {0}'.format(df.shape[0]))
    # log summary info about the dataframe
    print('=' * 50)
    df.info(verbose=True, memory_usage="deep")
    print('=' * 50)

    # force to use global variable TAXONOMY_DICT because map doesn't allow
    # passing extra args easily

    # TAXONOMY_DICT: a dict with tax_id as the key and each record as a value.
    # example tuple items:
    # (1,
    #  {'parent_tax_id': 1, 'name_txt': 'root',
    #   'rank': 'no rank', 'tax_id': 1}
    # )

    # (16,
    #  {'parent_tax_id': 32011, 'name_txt': 'Methylophilus',
    #   'rank': 'genus', 'tax_id': 16}
    # )

    global TAXONOMY_DICT
    logging.info('generating TAXONOMY_DICT...')
    TAXONOMY_DICT = dict(zip(df.tax_id.values, df.to_dict('records')))

    ncpus = multiprocessing.cpu_count()
    logging.info('found {0} cpus, and will use all of them to find lineages '
                 'for all tax ids'.format(ncpus))
    pool = multiprocessing.Pool(ncpus)
    # take about 18G memory
    lineages_dd = pool.map(find_lineage, df.tax_id.values)
    pool.close()

    logging.info('generating a dictionary of lineages information...')
    dd_for_df = dict(zip(range(len(lineages_dd)), lineages_dd))

    logging.info('generating lineages_df...')
    lineages_df = pd.DataFrame.from_dict(dd_for_df, orient='index')
    lineages_df.sort_values('tax_id', inplace=True)
    # # alternatively, but less useful, sort by ranks
    # lineages_df.sort_values(['superkingdom',
    #                          'phylum',
    #                          'class',
    #                          'order',
    #                          'family',
    #                          'genus',
    #                          'species'], inplace=True)

    lineages_csv_output = os.path.join('{0}.csv.gz'.format(args.output_prefix))
    backup_file(lineages_csv_output)
    logging.info("writing lineages to {0}".format(lineages_csv_output))
    with open(lineages_csv_output, 'wb') as opf:
        # make sure the name and timestamp are not gzipped, (like gzip -n)
        opf_gz = gzip.GzipFile(
            filename='',  # empty string because fileobj is given
            mode='wb',  # wb doesn't seem to work sometimes
            compresslevel=9,
            fileobj=opf,
            mtime=0.  # an optional numeric timestamp, set to be deterministic
        )
        cols = [
            'tax_id', 'superkingdom', 'phylum', 'class', 'order', 'family',
            'genus', 'species'
        ]

        other_cols = sorted(
            [__ for __ in lineages_df.columns if __ not in cols])
        output_cols = cols + other_cols
        lineages_df.to_csv(opf_gz, index=False, columns=output_cols)
        opf_gz.close()