def copy_or_move_files_to_folder(startingFolderPath, destinationFolderPath,
                                 copy_or_move):

    print('')

    for root, dirs, files in os.walk(
            startingFolderPath):  # compile list of all files in source folder
        #fileCount = str(len(files))

        for fileName in files:  # iterate through all files in source folder
            FromFilePath = startingFolderPath + fileName
            ToFilePath = destinationFolderPath + fileName

            if copy_or_move == 'COPY':
                copy_file(FromFilePath, ToFilePath)
                print('COPYING...')

            elif copy_or_move == 'MOVE':
                move_file(FromFilePath, ToFilePath)
                print('MOVING...')

            print('From: ' + FromFilePath)
            print('To: ' + ToFilePath)
            print('')

            loggerMessage = copy_or_move + ' FILE: ' + FromFilePath + ' --> ' + ToFilePath
            custom_logger(loggerMessage)
def extract_input_files(dest_dir: Path,
                        tgz_file: Path,
                        force_extract: bool = False):
    dest_dir = dest_dir.absolute()
    # Ensure the target directory is empty of .py files, or that --force-extract has been given.
    if dest_dir.is_dir() and list(dest_dir.glob('*.py')) and not force_extract:
        raise RuntimeError(
            f".py files found in: {dest_dir}. Remove them or use --force-extract to continue anyway."
        )
    # Ensure the .tgz file to extract from exists.
    if not tgz_file.exists():
        raise RuntimeError(f".tgz file does not exist: {tgz_file}")
    # Create a temporary directory, promising to return to the CWD after we're done.
    with temp_dir_and_return_to_orig_dir() as temp_dir:
        # Identify the Python-x.x.x/Lib/ directory.
        internal_lib_dir = splitext(basename(tgz_file))[0] + '/Lib/'
        # Extract the contents of the .tgz's Python-x.x.x/Lib/ directory to the temp directory.
        run([
            'tar', '--strip-components=2', '-xvzf',
            str(tgz_file), '-C', temp_dir, internal_lib_dir
        ],
            capture_output=True)
        # Change CWD to temp directory.
        chdir(temp_dir)
        # Collect all of the .py files that do not fail the containment checks.
        # Simultaneously, determine the destination filename for these files.
        pairs = ((path, dest_dir / str(path).replace('/', '_'))
                 for path in Path('.').glob('**/*.py') if (not any(
                     str(path).startswith(pat)
                     for pat in IGNORE_START_PATTERNS) and not any(
                         pat in str(path)
                         for pat in IGNORE_CONTAINS_PATTERNS)))
        # Move all of the files from the temporary directory to the destination directory.
        for orig, dest in pairs:
            move_file(orig, dest)
Esempio n. 3
0
def do_sync(src, dst, overwrite=False, move=False, quiet=False):
    """Copy all files from src to dst.

    If overwrite is True, then existing files and directories in the
    destination directory will be replaced if there is a file or
    directory in the source directory with the same name.

    If move is True, then files will be moved instead of copied. This
    is a useful optimization if the source directory will be deleted
    afterward anyway."""
    filenames = os.listdir(src)
    ensure_nonexistent(dst, filenames, delete=overwrite)
    for f in filenames:
        srcfile = os.path.join(src, f)
        dstfile = os.path.join(dst, f)
        if move:
            if not quiet:
                print "Move %s to %s" % (f, dst)
            move_file(srcfile, dstfile)
        elif os.path.isdir(srcfile):
            if not quiet:
                print "Copy dir %s to %s" % (f, dst)
            copy_tree(srcfile, dstfile, symlinks=True)
        else:
            if not quiet:
                print "Copy %s to %s" % (f, dst)
            copy_file(srcfile, dstfile)
    def send_files(self):
        path, dirs, files = next(walk(self.temp_download_dir))
        file_count = len(files)
        self.final_file_name = str()

        # The link is invalid
        if file_count == 0:
            return render_template("error_template.html")

        # We have more than one file, so let's zip them up and send them back.
        if file_count > 1:
            self.final_file_name = "tracks_" + str(datetime.now().timestamp()).replace('.', '')
            self.final_file_location = FilePath("/tmp/")  # noqa: S108
            make_archive(self.final_file_location / self.final_file_name, 'zip', self.temp_download_dir)
            self.final_file_location /= (self.final_file_name + ".zip")
            self.mime_type = "application/zip"
            move_file(str(self.final_file_location), STORAGE_FOLDER)
            self.final_file_name += ".zip"

        # We only have one track, so let's send the file back.
        else:
            self.final_file_name = next(walk(self.temp_download_dir))[2][0]
            move_file(safe_join(self.temp_download_dir, self.final_file_name), STORAGE_FOLDER)

        return safe_join("./transfer/", self.final_file_name)
Esempio n. 5
0
    def download(self, year, force=False):
        filename = self.filename(year)
        if not force and filename.exists():  # File has already been downloaded
            return {"downloaded": False, "filename": filename}

        url = self.url(year)
        file_data = download_file(url, progress=True)
        move_file(file_data.uri, filename)
        return {"downloaded": True, "filename": filename}
 def common_file(lst, src_path, asset):
     '''Processing for common sets of files.'''
     for fname in lst:
         full_in = os.path.join(src_path, fname)
         full_out = os.path.join(self.sitepath, 'assets', asset, fname)
         if os.path.exists(full_in):
             if os.path.exists(full_out):
                 os.remove(full_in)
             else:
                 move_file(full_in, full_out)
Esempio n. 7
0
 def moveVideo(self, video_path, video_name, video_actor):
     new_dir = os.path.join(video_path, video_actor)
     try:
         os.mkdir(new_dir)
         move_file(os.path.join(video_path, video_name),
                   os.path.join(new_dir, video_name))
     except FileExistsError as e:
         move_file(os.path.join(video_path, video_name),
                   os.path.join(new_dir, video_name))
     print('{}\tMOVE\tOK'.format(video_name))
Esempio n. 8
0
def operationalVideo(video_path, video_name, video_actor):
    new_dir = os.path.join(video_path, video_actor)
    try:
        os.mkdir(new_dir)
        move_file(os.path.join(video_path, video_name),
                  os.path.join(new_dir, video_name))
    except FileExistsError as e:
        move_file(os.path.join(video_path, video_name),
                  os.path.join(new_dir, video_name))
    print('\t{}\tOK...'.format(video_name))
 def common_file(lst, src_path, asset):
     '''Processing for common sets of files.'''
     for fname in lst:
         full_in = os.path.join(src_path, fname)
         full_out = os.path.join(self.sitepath, 'assets', asset, fname)
         if os.path.exists(full_in):
             if os.path.exists(full_out):
                 os.remove(full_in)
             else:
                 move_file(full_in, full_out)
Esempio n. 10
0
 def mvfile(self, src, dst):
     """
     Move a file from one place to another.
     
     :param src: The source file
     :type  src: str
     :param dst: The destination file
     :type  dst: str
     """
     move_file(src, dst)
Esempio n. 11
0
def save_specimen(collection, key, skeleton_text):
    temp_html_file = file_path('temp', key, 'html.gz')
    real_html_file = file_path(collection, key, 'html.gz')
    real_skel_file = file_path(collection, key, 'skel.gz')
    move_file(
        temp_html_file,
        real_html_file,
    )
    with gzip.open(real_skel_file, 'wb') as file_out:
        file_out.write(skeleton_text.encode('UTF-8'))
    unlink(file_path('temp', key, 'txt'))
    print("Saved %s" % real_html_file)
Esempio n. 12
0
    def discardImage(self, *argv):
        if not os.path.exists("./Images/_trash"):
            os.mkdir("./Images/_trash")
        im_name = self.currentimage['text'].split('\\')[-1]

        move_file(self.currentimage['text'],
                  os.path.join("./Images/_trash", im_name))

        cur_img = self.cur
        self.loadDir()
        self.cur = cur_img
        if self.cur > self.total:
            self.cur = self.total
        self.loadImage()
Esempio n. 13
0
def process_file(file: Path, backup_dict: dict, backup_dir: Path,
                 counts: dict):
    """Processes the given file"""
    print(f"Processing file: {file}...    ", end="")

    processors = {
        ".JPG": process_image,
        ".JPEG": process_image,
        ".AVI": process_video,
        ".MOV": process_video,
        ".MP4": process_video,
        ".MPG": process_video,
        ".MTS": process_video,
        ".M2TS": process_video,
    }
    extension = file.suffix.upper()
    if extension not in processors:
        print(f"\tUnsupported extension: {extension} skipping")
        return

    if not file.is_file():
        print(f"Not a file. Skipping")
        return
    checksum = hashlib.md5(file.open("rb").read()).hexdigest()

    existing_target_file = backup_dict.get(checksum, None)
    if existing_target_file is not None:
        if Path(existing_target_file).is_file:
            print(
                f"\n\tChecksum found in backup_dict: {existing_target_file}. Removing file."
            )
            counts['removed'] += 1
        try:
            file.unlink()
        except Exception:
            print(f"\tCould not remove the file: {file}")
            pass
        return
    else:
        try:
            target_file = processors[extension](file, backup_dir)
            print(f'\n\tMoving to file {target_file}')
            move_file(file, target_file)
            backup_dict[checksum] = target_file
            counts['moved'] += 1
        except Exception as e:
            print(f'Got exception: {e}')
            pass
    print('Done')
    def move_after_download(self):
        """
        Moves the downloaded file to its destined filepath.
        """
        if getcwd() == self.filepath:
            return None

        try:
            original_path = join_path(getcwd(), self.video_title + self.format)

            # * moving the file to self.filepath
            move_file(original_path, self.filepath)
            print("File moved to", self.filepath)

        except Exception as e:
            print(e)
Esempio n. 15
0
def worker_thread():
    global T, tlist
    cnt = 0
    try:
        for x in tlist:
            try:
                T.config(yscrollcommand=S.set, state="normal")
                if x[:8] == 'download':
                    T.insert(END, 'Downloading ' + x[len('download,'):] + '\n',
                             'blackcol')
                    download(x[len('download,'):])
                elif x[:8] == 'movefile':
                    b = x[9:].find(',')
                    source = x[9:b + 9]
                    target = x[b + 10:]
                    templist = [[source], [target]]
                    T.insert(END, 'Moving ' + source + ' to ' + target + '\n',
                             'blackcol')
                    move_file(source, target)
                elif x[:8] == 'mkfolder':
                    try:
                        os.stat(x[9:])
                    except:
                        os.mkdir(x[9:])
                        T.insert(END, 'Making folder - ' + x[9:] + '\n',
                                 'blackcol')
                elif x[:7] == 'extract':
                    T.insert(END, 'Extracting ' + x[8:] + '\n', 'blackcol')
                    zifi = zipfile.ZipFile(x[8:])
                    zifi.extractall()
            except Exception as e:
                e = str(e)
                T.insert(END, e + '\n', 'redcol')

        savef('', 'load/upd_filelist')
        T.config(yscrollcommand=S.set, state="normal")
        T.insert(END, 'Erased file list\n', 'blackcol')
        T.insert(END, '#Done\n', 'greencol')
        T.config(yscrollcommand=S.set, state="disabled")
    except Exception as e:
        e = str(e)
        T.config(yscrollcommand=S.set, state="normal")
        T.insert(END, 'Update failed, worker thread has crashed\n', 'redcol')
        T.insert(END, e + '\n', 'redcol')
        T.insert(END, '#Failed\n', 'failed')
        T.config(yscrollcommand=S.set, state="disabled")
Esempio n. 16
0
def worker_thread():
    global T, tlist
    cnt = 0
    try:
        for x in tlist:
            try:
                T.config(yscrollcommand=S.set,state="normal")
                if x[:8] == 'download':
                    T.insert(END, 'Downloading '+x[len('download,'):]+'\n','blackcol')
                    download(x[len('download,'):])
                elif x[:8] == 'movefile':
                    b = x[9:].find(',')
                    source = x[9:b+9]
                    target = x[b+10:]
                    templist = [[source],[target]]
                    T.insert(END, 'Moving '+source+' to '+target+'\n','blackcol')
                    move_file(source,target)
                elif x[:8] == 'mkfolder':
                    try:
                        os.stat(x[9:])
                    except:
                        os.mkdir(x[9:])
                        T.insert(END, 'Making folder - '+x[9:]+'\n','blackcol')
                elif x[:7] == 'extract':
                    T.insert(END, 'Extracting '+x[8:]+'\n','blackcol')
                    zifi = zipfile.ZipFile(x[8:])
                    zifi.extractall()
            except Exception as e:
                e = str(e)
                T.insert(END, e+'\n','redcol')
                    
        savef('','load/upd_filelist')
        T.config(yscrollcommand=S.set,state="normal")
        T.insert(END, 'Erased file list\n','blackcol')
        T.insert(END, '#Done\n','greencol')
        T.config(yscrollcommand=S.set,state="disabled")
    except Exception as e:
        e = str(e)
        T.config(yscrollcommand=S.set,state="normal")
        T.insert(END, 'Update failed, worker thread has crashed\n','redcol')
        T.insert(END, e+'\n','redcol')
        T.insert(END, '#Failed\n','failed')
        T.config(yscrollcommand=S.set,state="disabled")
Esempio n. 17
0
    def load_manifest(self, current_version):
        try:
            copy_file("data.pls", "save/data.pls")
            move_file("PlayListStore4.exe", "save/PlayListStore4.exe")

            versions = download(VersionsPath).text.split('=')
            if versions[-1] != current_version:
                if current_version == versions[-2]:
                    manifest = download(ManifestPath +
                                        "%s.txt" % versions[-1]).text
                    self.update(manifest)
                else:
                    for version in versions[versions.index(current_version) +
                                            1:]:
                        manifest = download(ManifestPath + "%s.txt" % version)
                        self.update(manifest)
        except Exception as e:
            print(e)
            open("UPDATE_ERRORS.txt", 'w').write(str(e))
            self.close()
def generate_graphs_pdf_file(graphs_dir: Path,
                             out_dir: Path,
                             overwrite: bool = False,
                             recursive_calls_file: Optional[Path] = None,
                             collated_results_file: Optional[Path] = None,
                             calculated_results_file: Optional[Path] = None,
                             results_pdf_file: Optional[Path] = None):
    graphs_tex_file = graphs_dir / GRAPHS_TEX_FILE
    graphs_pdf_file = graphs_tex_file.with_suffix('.pdf')
    if not overwrite and graphs_tex_file.is_file():
        raise RuntimeError(
            f"Output file {graphs_tex_file} already exists. Aborting!")
    if recursive_calls_file is None:
        recursive_calls_file = graphs_dir / DEFAULT_RECURSIVE_CALLS_FILE
    if collated_results_file is None:
        collated_results_file = graphs_dir / DEFAULT_COLLATED_RESULTS_FILE
    if calculated_results_file is None:
        calculated_results_file = graphs_dir / DEFAULT_RECURSIVE_CALLS_FILE
    if results_pdf_file is None:
        results_pdf_file = out_dir / graphs_pdf_file.name
    print(f"Generating LaTeX file for graphs at {graphs_tex_file}...")
    GRAPHS_FILE_TEXT = GRAPHS_FILE_CONTENTS.format(
        recursive_calls_short=str(
            recursive_calls_file.relative_to(
                recursive_calls_file.parent.parent.parent)),
        collated_results_short=str(
            collated_results_file.relative_to(
                collated_results_file.parent.parent.parent)),
        recursive_calls=str(recursive_calls_file),
        collated_results=str(collated_results_file),
        calculated_dir=str(calculated_results_file.parent),
        calculated=calculated_results_file.name)
    graphs_tex_file.write_text(GRAPHS_FILE_TEXT)
    print(f"Generating PDF of graphs at {results_pdf_file}...")
    prev_dir = getcwd()
    chdir(graphs_dir)
    run(['lualatex', graphs_tex_file])
    chdir(prev_dir)
    move_file(graphs_pdf_file, results_pdf_file)
Esempio n. 19
0
def main():
    client_id = get_twitch_client_id()
    current_date_string = get_current_date_string()
    while True:
        # Scrape the data for each game
        for game_configuration in game_configurations:
            # if a new day has started, move the completed data to its respective subfolder
            new_date_string = get_current_date_string()
            if not current_date_string == new_date_string:
                data_folder = os.path.join(os.getcwd(), 'data', game_configuration['shorthand'], file_name)
                print('Moving {} to: {}'.format(file_name, data_folder))
                move_file(src=file_name, dst=data_folder)
                current_date_string = new_date_string
            print('Scraping data for: {}'.format(game_configuration['full_name'][0]))
            # Get the data for the current game by invoking the twitchapi module
            api = twitchapi.APIStreamsRequest(
                game_url_name=game_configuration['url_name'],
                game_full_names=game_configuration['full_name'],
                client_id=client_id)
            try:
                api.request_all_game_data()
            except Exception as e:
                print(e)
                time.sleep(5)
                # move onto the next game
                continue
            returned_data = api.return_required_data()
            # if any returned data is available, then write to to the CSV
            file_name = game_configuration['shorthand'] + '_' + current_date_string + '.csv'
            if returned_data is not None and len(returned_data) > 0:
                write_to_file(
                    file_name=file_name,
                    rows=returned_data)
            else:
                print('No rows written for: {}'.format(game_configuration['full_name']))

        pause(cycle_delay)
Esempio n. 20
0
    def install(self):
        url = path.join(self.url)
        try:
            if url == '':
                raise NameError('is empty')
            elif ('\\' not in self.url) and ('/' not in self.url):
                raise NameError('not file')
            move_file('bin/dist/F_Reference_H', url)
            if self.check_icon.get():
                system(
                    f'@powershell \"$x=(New-Object -ComObject '
                    f'WScript.Shell).CreateShortcut('
                    f'\'%USERPROFILE%/Desktop/F_Reference_H.lnk\');$x'
                    f'.TargetPath=\''
                    f'{url}/F_Reference_H/F_Reference_H.exe\';$x'
                    f'.WorkingDirectory=\''
                    f'{url}/F_Reference_H\';$x.Save()\" '
                )
                system(
                    f'@powershell \"$x=(New-Object -ComObject '
                    f'WScript.Shell).CreateShortcut('
                    f'\'%APPDATA%\Microsoft\Windows\Start '
                    f'Menu\Programs\F_Reference_H.lnk\');$x'
                    f'.TargetPath=\''
                    f'{url}/F_Reference_H/F_Reference_H.exe\';$x'
                    f'.WorkingDirectory=\'{url}/F_Reference_H\';$x.Save()\"')
                showinfo('Successfully', 'Установка прошла успешно!')
                exit_ex()

        except NameError as error:
            if str(error) == 'is empty':
                showerror('Error', 'Пустое поле пути к папке!')
            if str(error) == 'not file':
                showerror(
                    'Error',
                    'Мы заметили, что вы выбрали неверный адрес!'
                )
Esempio n. 21
0
 def move_from_location(self, moved, destination):
     move_file(moved, destination)
Esempio n. 22
0
 def backup_project_dataframe(self):
     move_file(os.path.join(self.root_dir, 'dataframes', 'root.dfr'), os.path.join(self.root_dir, 'dataframes', f'root_bak_{time()}.dfr'))
Esempio n. 23
0
def main():
    desc = """This tool will search through a directory full of raw E3SM model time-slice output files, and find/fix any issues with the time index.
    If overlapping time segments are found, it will find the last file of the preceding segment and truncate it to match the index from the first file from the
    second segment."""
    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument(
        "input",
        help="The directory to check for time index issues, should only contain a single time-frequency from a single case"
    )
    parser.add_argument(
        "--output",
        default="output",
        required=False,
        help=f"output directory for rectified dataset, default is {os.environ['PWD']}/output"
    )
    parser.add_argument(
        "--move",
        action="store_true",
        required=False,
        help="move the files from the input directory into the output directory instead of symlinks"
    )
    parser.add_argument(
        "--copy",
        action="store_true",
        required=False,
        help="copy the files from the input directory into the output directory instead of symlinks"
    )
    parser.add_argument(
        "-j",
        "--jobs",
        default=8,
        type=int,
        help="the number of processes, default is 8"
    )
    parser.add_argument(
        "--dryrun",
        action="store_true",
        help="Collect the time segments, but dont produce the truncated files or move anything"
    )
    parser.add_argument(
        "--no-gaps", action="store_true", help="Exit if a time gap is discovered"
    )
    parser.add_argument(
        "-q", "--quiet", action="store_true", help="Suppress progress bars"
    )

    args = parser.parse_args()
    inpath = args.input
    outpath = args.output
    num_jobs = args.jobs
    dryrun = args.dryrun
    quiet = args.quiet

    if args.copy and args.move:
        con_message("error", "Both copy and move flags are set, please only pick one")
        return 1

    if os.path.exists(outpath) and len(os.listdir(outpath)):
        con_message(
            "error", f"Output directory {outpath} already exists and contains files")
        return 1
    else:
        os.makedirs(outpath, exist_ok=True)

    timename, bndsname = get_time_names(next(Path(inpath).glob("*")).as_posix())

    segments = collect_segments(inpath, num_jobs, timename, bndsname)

    if len(segments) == 1:
        con_message("info", "No overlapping segments found")
        if dryrun:
            con_message("info", "not moving files")
        else:
            desc = "Placing files into output directory"
            _, files = segments.popitem()
            for src in tqdm(files, desc=desc, disable=quiet):
                _, name = os.path.split(src)
                dst = os.path.join(outpath, name)
                if os.path.exists(dst):
                    continue
                if args.move:
                    move_file(src, dst)
                elif args.copy:
                    copyfile(src, dst)
                else:
                    os.symlink(src, dst)
        return 0

    ordered_segments = []
    for start, end in segments.keys():
        ordered_segments.append(
            {"start": start, "end": end, "files": segments[(start, end)]}
        )

    ordered_segments.sort(key=lambda i: i["start"])

    for s1, s2 in zip(ordered_segments[:-1], ordered_segments[1:]):
        if s2["start"] > s1["end"]:
            msg = f"There's a time gap between the end of {os.path.basename(s1['files'][-1])} and the start of {os.path.basename(s2['files'][0])} of {s2['start'] - s1['end']} "
            if args.no_gaps == True:
                outpath = Path(outpath)
                if not any(outpath.iterdir()):
                    outpath.rmdir()
                con_message("error", msg)
                sys.exit(1)
            con_message("warning", msg)
            if not args.dryrun:
                con_message("info", "Moving files from the previous segment")
                desc = "Placing files into output directory"
                for src in tqdm(s1["files"], desc=desc, disable=quiet):
                    _, name = os.path.split(src)
                    dst = os.path.join(outpath, name)
                    if os.path.exists(dst):
                        continue
                    if args.move:
                        move_file(src, dst)
                    elif args.copy:
                        copyfile(src, dst)
                    else:
                        os.symlink(src, dst)
                if ordered_segments.index(s2) == len(ordered_segments) - 1:
                    con_message("info", "Moving files from the last segment")
                    desc = "Placing files into output directory"
                    for src in tqdm(s2["files"], desc=desc, disable=quiet):
                        _, name = os.path.split(src)
                        dst = os.path.join(outpath, name)
                        if os.path.exists(dst):
                            continue
                        if args.move:
                            move_file(src, dst)
                        elif args.copy:
                            copyfile(src, dst)
                        else:
                            os.symlink(src, dst)
            continue

        to_truncate = None  # the file that needs to be truncated
        # the index in the file list of segment 1
        truncate_index = len(s1["files"])
        for file in tqdm(s1["files"][::-1], disable=quiet, desc="Stepping backwards to find truncation point"):
            with xr.open_dataset(file, decode_times=False) as ds:
                if ds[bndsname][-1].values[1] > s2["start"]:
                    truncate_index -= 1
                    continue
                else:
                    break

        con_message(
            "info",
            f"removing {len(s1['files']) - truncate_index} files from ({s1['start']}, {s1['end']})",
        )

        new_ds = xr.Dataset()
        to_truncate = s1["files"][truncate_index]
        with xr.open_dataset(to_truncate, decode_times=False) as ds:
            target_index = 0
            for i in range(0, len(ds[bndsname])):
                if ds[bndsname][i].values[1] == s2["start"]:
                    target_index += 1
                    break
                target_index += 1

            con_message(
                "info",
                f"truncating {to_truncate} by removing {len(ds[bndsname]) - target_index} time steps",
            )

            new_ds.attrs = ds.attrs
            for variable in ds.data_vars:
                if "time" not in ds[variable].coords and timename != "Time":
                    new_ds[variable] = ds[variable]
                    new_ds[variable].attrs = ds[variable].attrs
                    continue
                if timename == "time":
                    new_ds[variable] = ds[variable].isel(time=slice(0, target_index))
                    new_ds[variable].attrs = ds[variable].attrs
                else:
                    new_ds[variable] = ds[variable].isel(Time=slice(0, target_index))
                    new_ds[variable].attrs = ds[variable].attrs
                ds[variable].encoding['_FillValue'] = False

        _, to_truncate_name = os.path.split(to_truncate)
        outfile_path = os.path.join(outpath, f"{to_truncate_name[:-3]}.trunc.nc")

        if dryrun:
            con_message("info", f"dryrun, not writing out file {outfile_path}")
        else:
            con_message("info", f"writing out {outfile_path}")
            new_ds.to_netcdf(outfile_path, unlimited_dims=[timename])

        if dryrun:
            con_message("info", "dryrun, not moving files")
        else:
            desc = "Placing files into output directory"
            con_message("info", f"Moving the first {truncate_index} files")
            for src in tqdm(s1["files"][:truncate_index], desc=desc, disable=quiet):
                _, name = os.path.split(src)
                dst = os.path.join(outpath, name)
                if os.path.exists(dst):
                    continue
                if args.move:
                    move_file(src, dst)
                elif args.copy:
                    copyfile(src, dst)
                else:
                    os.symlink(src, dst)
    if dryrun:
        con_message("info", "dryrun, not moving files")
    else:
        con_message("info", "Moving files from the last segment")
        desc = "Placing files into output directory"
        for src in tqdm(ordered_segments[-1]["files"], desc=desc, disable=quiet):
            _, name = os.path.split(src)
            dst = os.path.join(outpath, name)
            if os.path.exists(dst):
                continue
            if args.move:
                move_file(src, dst)
            elif args.copy:
                copyfile(src, dst)
            else:
                os.symlink(src, dst)

    return 0
Esempio n. 24
0
def main():
    desc = """This tool will search through a directory full of raw E3SM model time-slice output files, and find/fix any issues with the time index.
    If overlapping time segments are found, it will find the last file of the preceding segment and truncate it to match the index from the first file from the
    second segment."""
    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument('input', help="The directory to check for time index issues, should only contain a single time-frequency from a single case")
    parser.add_argument('--output', default="output", required=False, help=f"output directory for rectified dataset, default is {os.environ['PWD']}/output")
    parser.add_argument('--move', action="store_true", required=False, help="move the files from the input directory into the output directory instead of symlinks")
    parser.add_argument('--copy', action="store_true", required=False, help="copy the files from the input directory into the output directory instead of symlinks")
    parser.add_argument('-j', '--jobs', default=8, type=int, help="the number of processes, default is 8")
    parser.add_argument('--dryrun', action="store_true", help="Collect the time segments, but dont produce the truncated files or move anything")
    parser.add_argument('--no-gaps', action="store_true", help="Exit if a time gap is discovered")
    args = parser.parse_args()
    inpath = args.input
    outpath = args.output
    num_jobs = args.jobs
    dryrun = args.dryrun

    if args.copy and args.move:
        print("Both copy and move flags are set, please only pick one")
        return 1

    if os.path.exists(outpath) and len(os.listdir(outpath)):
        print(f"Output directory {outpath} already exists and contains files")
        return 1
    else:
        os.makedirs(outpath, exist_ok=True)
    
    timename, bndsname = get_time_names(next(Path(inpath).glob('*')).as_posix())

    segments = collect_segments(inpath, num_jobs, timename, bndsname)

    if len(segments) == 1:
        print("No overlapping segments found")
        if dryrun:
            print("not moving files")
        else:
            desc = "Placing files into output directory"
            index, files = segments.popitem()
            for src in tqdm(files, desc=desc):
                _, name = os.path.split(src)
                dst = os.path.join(outpath, name)
                if args.move:
                    move_file(src, dst)
                elif args.copy:
                    copyfile(src, dst)
                else:
                    os.symlink(src, dst)
        return 0
    
    ordered_segments = []
    for start, end in segments.keys():
        ordered_segments.append({
            'start': start,
            'end': end,
            'files': segments[(start, end)]
        })
    
    ordered_segments.sort(key=lambda i: i['start'])
    
    for s1, s2 in zip(ordered_segments[:-1], ordered_segments[1:]):
        if s2['start'] > s1['end']:
            # units = get_time_units(s1['files'][0])
            # {units.split(' ')[0]}
            msg = f"There's a time gap between the end of {s1['files'][-1]} and the start of {s2['files'][0]} of {s2['start'] - s1['end']} "
            if args.no_gaps:
                raise ValueError(msg)
            else:
                print(msg)
                if not args.dryrun:
                    print('Moving files from the last segment')
                    desc = "Placing files into output directory"
                    for src in tqdm(s1['files'], desc=desc):
                        _, name = os.path.split(src)
                        dst = os.path.join(outpath, name)
                        if args.move:
                            move_file(src, dst)
                        elif args.copy:
                            copyfile(src, dst)
                        else:
                            os.symlink(src, dst)
                continue
        
        to_truncate = None # the file that needs to be truncated
        truncate_index = len(s1['files']) # the index in the file list of segment 1
        for file in s1['files'][::-1]:
            with xr.open_dataset(file, decode_times=False) as ds:
                if ds[bndsname][-1].values[1] > s2['start']:
                    truncate_index -= 1
                    continue
                else:
                    break
        
        print(f"removing {len(s1['files']) - truncate_index} files from ({s1['start']}, {s1['end']})")

        new_ds = xr.Dataset()
        to_truncate = s1['files'][truncate_index]
        with xr.open_dataset(to_truncate, decode_times=False) as ds:
            target_index = 0
            for i in range(0, len(ds[bndsname])):
                if ds[bndsname][i].values[1] == s2['start']:
                    target_index += 1
                    break
                target_index += 1

            print(f"truncating {to_truncate} by removing {len(ds[bndsname]) - target_index} time steps")

            new_ds.attrs = ds.attrs
            for variable in ds.data_vars:
                if 'time' not in ds[variable].coords and timename != 'Time':
                    new_ds[variable] = ds[variable]
                    new_ds[variable].attrs = ds[variable].attrs
                    continue
                if timename == 'time':
                    new_ds[variable] = ds[variable].isel(time=slice(0, target_index))
                    new_ds[variable].attrs = ds[variable].attrs
                else:
                    new_ds[variable] = ds[variable].isel(Time=slice(0, target_index))
                    new_ds[variable].attrs = ds[variable].attrs
        
        _, to_truncate_name = os.path.split(to_truncate)
        outfile_path = os.path.join(outpath, f"{to_truncate_name[:-3]}.trunc.nc")

        if dryrun:
            print(f"not writing out file {outfile_path}")
        else:
            print(f"writing out {outfile_path}")
            write_netcdf(new_ds, outfile_path, unlimited=[timename])

        if dryrun:
            print("not moving files")
        else:
            desc = "Placing files into output directory"
            print(f"Moving the first {truncate_index} files")
            for src in tqdm(s1['files'][:truncate_index], desc=desc):
                _, name = os.path.split(src)
                dst = os.path.join(outpath, name)
                if args.move:
                    move_file(src, dst)
                elif args.copy:
                    copyfile(src, dst)
                else:
                    os.symlink(src, dst)
    if dryrun:
        print("not moving files")
    else:
        print('Moving files from the last segment')
        desc = "Placing files into output directory"
        for src in tqdm(ordered_segments[-1]['files'], desc=desc):
            _, name = os.path.split(src)
            dst = os.path.join(outpath, name)
            if args.move:
                move_file(src, dst)
            elif args.copy:
                copyfile(src, dst)
            else:
                os.symlink(src, dst)

    return 0