Example #1
0
def setup():
    parser = argparse.ArgumentParser()
    parser.add_argument('--shuffle', action='store_true')
    parser.add_argument('--dataset-dir',
                        default='dataset',
                        type=str,
                        metavar='DIR')
    parser.add_argument('--nolabel-dir',
                        default='nolabel',
                        type=str,
                        metavar='DIR')
    #parser.add_argument('--label-master', default='label_master.tsv', type=str, metavar='FILE')
    parser.add_argument('--train-master',
                        default='train_master.tsv',
                        type=str,
                        metavar='FILE')
    parser.add_argument('--validate_ratio',
                        default=0.2,
                        type=float,
                        help='validate (default==>2/10)')
    parser.parse_args()
    args = parser.parse_args()

    nolabel_dir = os.path.join(args.dataset_dir, args.nolabel_dir)
    train_dir = os.path.join(args.dataset_dir, 'train')
    validate_dir = os.path.join(args.dataset_dir, 'validate')

    #print('==> Getting all labels..')
    #all_labels = get_all_labels(os.path.join(args.dataset_dir, args.label_master))
    #label_div = dict()
    #for label in all_labels:
    #    label_div[label] = list()

    print('==> Getting dataset information..')
    ds_info = get_dataset_info(
        os.path.join(args.dataset_dir, args.train_master))

    label_div = dict()
    for file_name, label in ds_info:
        if label not in label_div:
            label_div[label] = list()
        label_div[label].append([file_name, label])

    print('==> Deviding a dataset..')
    train_ds = list()
    validate_ds = list()
    for _, dataset in label_div.items():
        n_ds = len(dataset)
        div_i = n_ds - int(n_ds * args.validate_ratio)
        if args.shuffle:
            random.shuffle(dataset)
        train_ds.extend(dataset[:div_i])
        validate_ds.extend(dataset[div_i:])

    print('==> Moving a dataset..')
    move_files(train_ds, nolabel_dir, train_dir)
    move_files(validate_ds, nolabel_dir, validate_dir)

    os.makedirs('weights', exist_ok=True)
Example #2
0
def move_to_folders(
        path: Path = Argument(default='.',
                              exists=True,
                              file_okay=True,
                              dir_okay=True,
                              readable=True,
                              resolve_path=True),
        create_folders: bool = Option(default=False, ),
):
    move_files(path, create_folders)
def place_trials_default(expnum, start_time, end_time, verbose=False):
    """This is going to be the primary way of moving processed data from it's proper location
    to the PEN tool's subfolder. As long as the data is organized with our standard format where
    the metadata is located on the mysql database, this will handle all the uploading.
    WARNING: Currently this will not realize if you've pointed it to a folder that it already uploaded."""
    destination = experiment_path[expnum]
    current_trial = utils.find_last_trial(expnum) + 1
    existing_evid_dict = caching.load_evid_dictionary(expnum)
    event_data_dicts = smysql.retrieve_event_description(start_time, end_time, list_of_sites=mySQL_sitedef[expnum])
    default_folder = smysql.retrieve_data_folder()

    # Look at every event in the database between time constraints.
    for event in event_data_dicts:
        site_evt_number = event[cfg_evt_siteEvt]
        site_evt_time = event[cfg_evt_time]
        site_event_id = event[cfg_evt_evid]
        site_event_dist = event[cfg_evt_dist]
        site_event_ml = event[cfg_evt_ml]
        file_data_dicts = smysql.retrieve_file_location(site_evt_number, mySQL_stadef[expnum])

        # If this event has already been uploaded, report it and skip this event.
        if site_event_id in existing_evid_dict.values():
            nees_logging.log_existing_evid(site_event_id)
            continue

        # Don't do anything if there's no data
        if file_data_dicts == []:
            continue

        # Generate file structure on shttp and local system.
        description = utils.generate_description(event)
        trialtitle = datetime.datetime.utcfromtimestamp(site_evt_time).strftime(default_time_format)
        trial_doc_folder = "%sTrial-%s/Documentation/" % (destination, current_trial)
        report_source = "%sTrial-%s/Rep-1/%s/" % (destination, current_trial, cfg_hub_ext_fold[".txt"])
        report_name = "report.csv"
        readme_name = "readme.pdf"
        events_kml = "event.kml"
        utils.generate_trial_structure(destination, current_trial)
        shttp.post_full_trial(shttp.experiment_id_dic[expnum], trialtitle, description, current_trial)

        # Find and move every file within an event to the created file structure.
        move_datafiles(file_data_dicts, event, destination, current_trial, trial_doc_folder, default_folder, expnum)
        utils.move_files(report_source, trial_doc_folder, [report_name, readme_name, events_kml])
        snupload.upload_reportfile(expnum, current_trial, trial_doc_folder, report_name)
        snupload.upload_reportfile(expnum, current_trial, trial_doc_folder, readme_name)
        snupload.upload_reportfile(expnum, current_trial, trial_doc_folder, events_kml)
        utils.clean_up(report_source)

        # Move on to next trial for further processing after updating cache..
        nees_logging.log_goto_nextline(neeshub_log_filename)
        caching.update_all_cache_dictionaries(expnum, current_trial, site_event_id, site_event_ml, site_event_dist)
        current_trial += 1
Example #4
0
def substitute_ad_files_and_upgrade_ad_tactics():
    download_files(f_url = "https://raw.githubusercontent.com/ToutyRater/V2Ray-SiteDAT/master/geofiles/h2y.dat", f_name = "h2y.dat")
    if os.path.exists(ad_rules_file):
        os.remove(ad_rules_file)
        move_files("h2y.dat", "/usr/bin/v2ray/")
        writejson.WriteAD("on")
        if re.search(r'/v2ray.fun/maintain.sh', cronfile.read()):
            os.system("bash /usr/local/v2ray.fun/maintain.sh")
            print ("设置成功!")
        else:
            os.system("sed -i '$i 30 4    * * 0   root    bash /usr/local/v2ray.fun/maintain.sh' /etc/crontab")
            os.system("bash /usr/local/v2ray.fun/maintain.sh")
            print ("设置成功!")
    else:
        move_files("h2y.dat", "/usr/bin/v2ray/")
        writejson.WriteAD("on")
Example #5
0
    def error_cleanup(self,
                      input_schema_name,
                      input_table_name,
                      run_id,
                      path=None,
                      conn_metadata=None,
                      conn_source=None,
                      conn_target=None,
                      target_path=None):
        method_name = self.class_name + ": " + "error_cleanup"
        print_hdr = "[" + method_name + ": " + self.data_path + ": " + str(
            self.load_id) + "] - "
        print(logdt.now().strftime('[%Y-%m-%d %H:%M:%S] ') + print_hdr +
              "Entered")

        if path is None:
            path = self.config_list['misc_hdfsStagingPath']
        if self.data_path.find("GP2HDFS") <> -1 or self.data_path.find(
                "HDFS2MIR") <> -1:
            remove_files(path, input_schema_name, input_table_name)

        if self.data_path.find("KFK2Hive") <> -1:
            if path is not None and target_path is not None:
                move_hdfs_files(path, target_path)

        if self.data_path.find("SRC2Hive") <> -1:
            if path is not None and target_path is not None:
                move_files(path, (target_path + input_schema_name))

        if input_table_name is not None:
            self.update_control(input_schema_name, input_table_name,
                                self.CONTROL_STATUS_ERROR, run_id)

        if conn_metadata is not None and not conn_metadata.closed:
            conn_metadata.close()

        if conn_source is not None and not conn_source.closed:
            conn_source.close()

        if conn_target is not None:
            conn_target.close()
Example #6
0
    def _init_gallery(self, download_item):
        """Init gallery.

        Args:
            download_item(:class:`.gallery_downloader_item_obj.GalleryDownloaderItemObject`):
            Downloaded item.
        """
        assert isinstance(download_item, GalleryDownloaderItem)
        # NOTE: try to use ehen's apply_metadata first
        # manager have to edit item.metadata to match this method
        file = download_item.item.file
        app_constants.TEMP_PATH_IGNORE.append(os.path.normcase(file))
        self._download_items[file] = download_item
        self._download_items[utils.move_files(file, only_path=True)] = download_item  # better safe than sorry
        if download_item.item.download_type == app_constants.DOWNLOAD_TYPE_OTHER:
            pass # do stuff here?

        self.init_fetch_instance.emit([file])
    def make_gallery(self, new_gallery, add_to_model=True, new=False):
        if self.check():
            new_gallery.title = self.title_edit.text()
            log_d("Adding gallery title")
            new_gallery.artist = self.author_edit.text()
            log_d("Adding gallery artist")
            log_d("Adding gallery path")
            if new and gui_constants.MOVE_IMPORTED_GALLERIES:
                gui_constants.OVERRIDE_MONITOR = True
                new_gallery.path = utils.move_files(self.path_lbl.text())
            else:
                new_gallery.path = self.path_lbl.text()
            new_gallery.info = self.descr_edit.toPlainText()
            log_d("Adding gallery descr")
            new_gallery.type = self.type_box.currentText()
            log_d("Adding gallery type")
            new_gallery.language = self.lang_box.currentText()
            log_d("Adding gallery lang")
            new_gallery.status = self.status_box.currentText()
            log_d("Adding gallery status")
            new_gallery.tags = utils.tag_to_dict(self.tags_edit.toPlainText())
            log_d("Adding gallery: tagging to dict")
            qpub_d = self.pub_edit.date().toString("ddMMyyyy")
            dpub_d = datetime.strptime(qpub_d, "%d%m%Y").date()
            try:
                d_t = self.gallery_time
            except AttributeError:
                d_t = datetime.now().time().replace(microsecond=0)
            dpub_d = datetime.combine(dpub_d, d_t)
            new_gallery.pub_date = dpub_d
            log_d("Adding gallery pub date")
            new_gallery.link = self.link_lbl.text()
            log_d("Adding gallery link")
            if not new_gallery.chapters:

                def do_chapters(gallery):
                    log_d("Starting chapters")
                    thread = threading.Thread(target=self.set_chapters, args=(gallery, add_to_model), daemon=True)
                    thread.start()
                    thread.join()
                    log_d("Finished chapters")

                do_chapters(new_gallery)
            return new_gallery
Example #8
0
    def _init_gallery(self, download_item):
        """Init gallery.

        Args:
            download_item(:class:`.gallery_downloader_item_obj.GalleryDownloaderItemObject`):
            Downloaded item.
        """
        assert isinstance(download_item, GalleryDownloaderItem)
        # NOTE: try to use ehen's apply_metadata first
        # manager have to edit item.metadata to match this method
        file = download_item.item.file
        app_constants.TEMP_PATH_IGNORE.append(os.path.normcase(file))
        self._download_items[file] = download_item
        self._download_items[utils.move_files(
            file, only_path=True)] = download_item  # better safe than sorry
        if download_item.item.download_type == app_constants.DOWNLOAD_TYPE_OTHER:
            pass  # do stuff here?

        self.init_fetch_instance.emit([file])
Example #9
0
 def make_gallery(self, new_gallery, add_to_model=True, new=False):
     if self.check():
         new_gallery.title = self.title_edit.text()
         log_d('Adding gallery title')
         new_gallery.artist = self.author_edit.text()
         log_d('Adding gallery artist')
         log_d('Adding gallery path')
         if new and app_constants.MOVE_IMPORTED_GALLERIES:
             app_constants.OVERRIDE_MONITOR = True
             new_gallery.path = utils.move_files(self.path_lbl.text())
         else:
             new_gallery.path = self.path_lbl.text()
         new_gallery.info = self.descr_edit.toPlainText()
         log_d('Adding gallery descr')
         new_gallery.type = self.type_box.currentText()
         log_d('Adding gallery type')
         new_gallery.language = self.lang_box.currentText()
         log_d('Adding gallery lang')
         new_gallery.status = self.status_box.currentText()
         log_d('Adding gallery status')
         new_gallery.tags = utils.tag_to_dict(self.tags_edit.toPlainText())
         log_d('Adding gallery: tagging to dict')
         qpub_d = self.pub_edit.date().toString("ddMMyyyy")
         dpub_d = datetime.strptime(qpub_d, "%d%m%Y").date()
         try:
             d_t = self.gallery_time
         except AttributeError:
             d_t = datetime.now().time().replace(microsecond=0)
         dpub_d = datetime.combine(dpub_d, d_t)
         new_gallery.pub_date = dpub_d
         log_d('Adding gallery pub date')
         new_gallery.link = self.link_lbl.text()
         log_d('Adding gallery link')
         if not new_gallery.chapters:
             log_d('Starting chapters')
             thread = threading.Thread(target=self.set_chapters,
                                       args=(new_gallery, add_to_model),
                                       daemon=True)
             thread.start()
             thread.join()
             log_d('Finished chapters')
         return new_gallery
Example #10
0
def move_files():
    from utils import move_files
    move_files()
Example #11
0
    if_open_ad_function = "广告拦截功能: 未开启"
else:
    if_open_ad_function = "广告拦截功能: 开启"

print("")
print(if_open_ad_function)

print("")
print("1. 开启")
print("2. 关闭")
print("3. 更新广告过滤策略")

choice = raw_input("请选择: ")

if choice == "1":
    writejson.WriteAD("on")
elif choice == "2":
    writejson.WriteAD("off")
elif choice == "3":
    download_files(
        f_url=
        "https://raw.githubusercontent.com/ToutyRater/V2Ray-SiteDAT/master/geofiles/h2y.dat",
        f_name="h2y.dat")
    if os.path.exists(ad_rules_file):
        os.remove(ad_rules_file)
        move_files("h2y.dat", "/usr/bin/v2ray/")
        writejson.WriteAD("on")
    else:
        move_files("h2y.dat", "/usr/bin/v2ray/")
        writejson.WriteAD("on")
Example #12
0
    def fs2hdfs_hive_log(self):
        hosts = []
        # Get information about the table to load
        try:
            metadata_sql = "SELECT * FROM sync.control_table \
                        WHERE target_tablename = 'hive_log_ext' \
                            AND target_schemaname = 'default'" + " \
                            AND data_path = " + "'FS2HDFS'"

            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                  "metadata_sql: " + metadata_sql)
            conn_metadata, cur_metadata = dbConnect(self.metastore_dbName,
                                                    self.dbmeta_User,
                                                    self.dbmeta_Url,
                                                    self.dbmeta_Pwd)
            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                  "before connecting to metastore controls")
            controls = dbQuery(cur_metadata, metadata_sql)
            # print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + "metastore controls:", controls)
        except psycopg2.Error as e:
            error = 2
            err_msg = "Error connecting to control table database".format(
                error)
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print output_msg
            return output_msg
            sys.exit(error)
        finally:
            conn_metadata.close()

        if not controls:
            error = 3
            err_msg = "No Entry found in control table".format(error)
            status = 'Job Error'
            output_msg = "No Entry found in control table"
            return output_msg
            sys.exit(error)

        self.id = str(controls[0]['id'])
        self.source_schema = str(controls[0]['source_schemaname'])
        self.source_tablename = str(controls[0]['source_tablename'])
        self.target_schema = str(controls[0]['target_schemaname'])
        self.target_tablename = str(controls[0]['target_tablename'])
        partitioned = controls[0]['is_partitioned']
        self.load_type = str(controls[0]['load_type'])
        self.s3_backed = controls[0]['s3_backed']
        first_partitioned_column = str(controls[0]['first_partitioned_column'])
        second_partitioned_column = str(
            controls[0]['second_partitioned_column'])
        partitioned_column_transformation = str(
            controls[0]['partition_column_transformation'])
        custom_sql = str(controls[0]['custom_sql'])
        self.join_columns = str(controls[0]['join_columns'])
        self.archived_enabled = controls[0]['archived_enabled']
        distribution_columns = str(controls[0]['distribution_columns'])
        dist_col_transformation = str(controls[0]['dist_col_transformation'])
        self.log_mode = str(controls[0]['log_mode'])
        self.last_run_time = str(controls[0]['last_run_time'])

        incoming_path = self.paths + "/hiveserver2.log"
        local_inprogress_path = self.local_staging_path + "/in_progress/"
        inprogress_path = self.staging_path + self.target_schema + "/" + self.target_tablename + "/in_progress/"
        hosts = self.hive_hosts.split(',')
        print hosts
        # Creating the Local in_progress and/or clearing that location for new incoming files
        for host in hosts:
            print "Inside Host path check"
            path_to_check = self.local_staging_path + host
            print path_to_check
            path_check = glob.glob(path_to_check)
            print path_check
            if len(path_check) > 0:
                print "Path exists... Clearing the directory"
                (ret, out, err) = run_cmd(['rm', '-rf', (path_to_check)])
                print(ret, out, err)
                if ret:
                    error = 1
                    err_msg = "Error while cleaning in_progress location in Local FS".format(
                        error)
                    print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                          err)
                    status = 'Job Error'
                    output_msg = traceback.format_exc()
                    print output_msg
                    sys.exit(error)
                    return output_msg

            (ret, out, err) = run_cmd(['mkdir', '-p', path_to_check])
            if ret:
                error = 1
                err_msg = "Error while creating in_progress location in Local FS".format(
                    error)
                print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                status = 'Job Error'
                output_msg = traceback.format_exc()
                print output_msg
                sys.exit(error)
                return output_msg

        path_check = glob.glob(local_inprogress_path)
        if len(path_check) > 0:
            print "Path exists... Clearing the directory"
            (ret, out, err) = run_cmd(['rm', '-rf', (local_inprogress_path)])
            print(ret, out, err)
            if ret:
                error = 1
                err_msg = "Error while cleaning in_progress location in Local FS".format(
                    error)
                print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                status = 'Job Error'
                output_msg = traceback.format_exc()
                print output_msg
                sys.exit(error)
                return output_msg
        (ret, out, err) = run_cmd(['mkdir', '-p', local_inprogress_path])
        if ret:
            error = 1
            err_msg = "Error while creating in_progress location in Local FS".format(
                error)
            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print output_msg
            sys.exit(error)
            return output_msg

# Creating the HDFS in_progress location and/or clearing that location for new incoming files
        (ret, out,
         err) = run_cmd(["hadoop", "fs", "-test", "-e", inprogress_path])
        if ret:
            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                  "Directory does not exist ... Creating...")
            (ret, out,
             err) = run_cmd(["hadoop", "fs", "-mkdir", "-p", inprogress_path])
            if ret:
                error = 1
                err_msg = "Error while creating in_progress location in HDFS".format(
                    error)
                print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                status = 'Job Error'
                output_msg = traceback.format_exc()
                print output_msg
                sys.exit(error)
                return output_msg
        # else:
        #     (ret, out, err) = run_cmd(["hadoop", "fs", "-rm", "-r", inprogress_path + "*"])
        #     if ret:
        #         if err.find("No such file or directory") <> -1:
        #             pass
        #         else:
        #             error = 1
        #             err_msg = "Error while cleaning in_progress location in HDFS".format(error)
        #             print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
        #             status = 'Job Error'
        #             output_msg = traceback.format_exc()
        #             print output_msg
        #             return output_msg

# Checking the last run time of the table.
# Bringing the files from each host since the last run time
        from datetime import date, timedelta
        if self.last_run_time == 'None':
            self.last_run_time = str(datetime.now())
        print "Last Run Time : ", self.last_run_time
        lr_dt, lr_ts = self.last_run_time.split()
        lr_dt = datetime.strptime(lr_dt, "%Y-%m-%d").date()
        today = datetime.now().date()
        delta = today - lr_dt
        # hosts = self.hive_hosts.split(',')
        print hosts
        for host in hosts:
            (ret, out, err) = run_cmd([
                'scp', ('hdp@' + host + ':' + incoming_path),
                (self.local_staging_path + host + "/")
            ])
            print ret, out, err
            if ret > 0:
                error = 1
                err_msg = "Error while moving Current Log File to Local in_progress location".format(
                    error)
                print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                status = 'Job Error'
                output_msg = traceback.format_exc()
                print err_msg, output_msg
                sys.exit(error)
                return output_msg
            for i in range(delta.days):
                dt = (lr_dt + timedelta(days=i))
                dtstr = dt.isoformat()
                print dtstr
                (ret, out, err) = run_cmd([
                    'scp',
                    ('hdp@' + host + ':' + incoming_path + '.' + dtstr + '*'),
                    (self.local_staging_path + host + "/")
                ])
                print ret, out, err
                if ret > 0:
                    if err.find('No such file or directory') <> -1:
                        pass
                    else:
                        error = 1
                        err_msg = "Error while moving data to in_progress location".format(
                            error)
                        print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                              err)
                        status = 'Job Error'
                        output_msg = traceback.format_exc()
                        print output_msg
                        sys.exit(error)
                        return output_msg

# Unzipping the files if there are any zipped files
        for host in hosts:
            files = glob.glob((self.local_staging_path + host + "/*"))
            for file in files:
                if file.find(".gz") <> -1:
                    try:
                        with gzip.open(file, 'rb') as f_in:
                            with open((file.replace('.gz', '_') + host),
                                      'wb') as f_out:
                                shutil.copyfileobj(f_in, f_out)
                    except Exception as e:
                        error = 4
                        err_msg = "Error while unzipping file in Local FS"
                        output_msg = traceback.format_exc()
                        print err_msg, output_msg
                        sys.exit(error)
                        return output_msg
                    #(ret,out,err)       = run_cmd(['gunzip', '-c', file, ' > ','test')])
                    # (ret, out, err) = run_cmd(['gunzip', file])
                    #(ret, out, err) = run_cmd(['zcat',  file, '>', (file.replace('.gz', '_') + host)])
                    # if ret > 0:
                    #     error = 1
                    #     err_msg = "Error while unzipping file in Local FS".format(error)
                    #     print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                    #     status = 'Job Error'
                    #     output_msg = traceback.format_exc()
                    #     print err_msg, output_msg
                    #     return output_msg
                    (ret, out, err) = run_cmd(['rm', '-f', file])
                    if ret > 0:
                        error = 1
                        err_msg = "Error while removing zipped file in Local FS".format(
                            error)
                        print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                              err)
                        status = 'Job Error'
                        output_msg = traceback.format_exc()
                        print err_msg, output_msg
                        sys.exit(error)
                        return output_msg
                else:
                    (ret, out,
                     err) = run_cmd(['mv', file, (file + '_' + host)])
                    if ret > 0:
                        error = 1
                        err_msg = "Error while renaming file in Local FS".format(
                            error)
                        print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                              err)
                        status = 'Job Error'
                        output_msg = traceback.format_exc()
                        print err_msg, output_msg
                        sys.exit(error)
                        return output_msg

# Moving the final set of files to the in_progress location to send it to HDFS
            move_files((self.local_staging_path + host + "/*"),
                       local_inprogress_path)
            if ret > 0:
                error = 1
                err_msg = "Error while moving files to in_progress location in Local FS".format(
                    error)
                print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
                status = 'Job Error'
                output_msg = traceback.format_exc()
                print err_msg, output_msg
                sys.exit(error)
                return output_msg


# Ingesting to HDFS

        (ret, out, err) = run_cmd([
            'hadoop', 'distcp', '-overwrite',
            'file:///' + (local_inprogress_path + "/*"),
            'hdfs:///' + inprogress_path
        ])
        if ret > 0:
            error = 1
            err_msg = "Error while moving files to HDFS from Local in_progress path".format(
                error)
            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + err)
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print err_msg, output_msg
            sys.exit(error)
            return output_msg

        try:
            metadata_sql = "UPDATE sync.control_table SET last_run_time = now() \
                        WHERE target_tablename = 'hive_log' \
                            AND target_schemaname = 'default'" + " \
                            AND data_path = " + "'FS2HDFS'"

            print(datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') +
                  "metadata_sql: " + metadata_sql)
            conn_metadata, cur_metadata = dbConnect(self.metastore_dbName,
                                                    self.dbmeta_User,
                                                    self.dbmeta_Url,
                                                    self.dbmeta_Pwd)
            cur_metadata.execute(metadata_sql)
            # print (datetime.now().strftime('[%Y-%m-%d %H:%M:%S] ') + "metastore controls:", controls)
        except psycopg2.Error as e:
            error = 2
            err_msg = "Error connecting to control table database".format(
                error)
            status = 'Job Error'
            output_msg = traceback.format_exc()
            print output_msg
            sys.exit(error)
            return output_msg
        finally:
            conn_metadata.close()
Example #13
0
    mo = regex.search(new)
    assert mo is not None
    mo = regex.search(new)
    assert mo is not None

#
# move_files
#

print('不在当前文件夹中修改')
shutil.rmtree(NEW_IMAGE_FOLDER)
os.mkdir(NEW_IMAGE_FOLDER)
old_names = IMAGES
new_names = utils.construct_new_names(old_names, NEW_IMAGE_FOLDER)

utils.move_files(utils.sort_image_file_paths(old_names), new_names)

first_flaged_file = '2x2'
second_flaged_file = '10x3'
third_flaged_file = '12x03'

for image in IMAGES:
    if first_flaged_file in image.lower():
        first_flaged_file = image
    if second_flaged_file in image.lower():
        second_flaged_file = image
    if third_flaged_file in image.lower():
        third_flaged_file = image

assert os.stat(first_flaged_file).st_size == os.stat(new_names[4]).st_size
assert os.stat(second_flaged_file).st_size == os.stat(new_names[29]).st_size
Example #14
0
def main():
    # preprocessing
    # 매칭되지 않는 파일 삭제 및 파일명 구조 통일화
    preprocessing.remove_rename(args)

    # save_path check
    utils.check_directory(args.save_path)

    # read files & sorting
    annotation_files = os.listdir(args.annotation_path)
    images_files = os.listdir(args.image_path)

    annotation_files_sort = sorted(annotation_files)
    images_files_sort = sorted(images_files)
    assert (len(annotation_files_sort) != len(images_files_sort),
            '파일 개수가 맞지 않음 anno : {0}, images : {1}'.format(
                len(annotation_files), len(images_files)))

    # start
    print('Crop start')
    start = time.time()  # 시작 시간 저장
    crop_image_count = 0
    for i in range(len(images_files)):
        annotation_file = utils.tag_remove_parser(annotation_files_sort[i])
        images_file = utils.tag_remove_parser(images_files_sort[i])

        # .DS_Store : mac에서 발생하는 os 오류.
        if (annotation_file != images_file or annotation_file == '.DSStore'
                or images_file == '.DSStore'):
            print('파일명이 일치 하지 않음 {0} 번째 파일'.format(i))
            continue
        # read xml, image files
        tree = parse(
            os.path.join(
                args.annotation_path,
                annotation_file + '.xml',
            ))
        origin_image = Image.open(
            os.path.join(args.image_path, images_file + '.jpg'))
        # read xml
        root = tree.getroot()
        # Find first tag
        elements = root.findall("object")
        # Get Class name
        names = [x.findtext("name") for x in elements]
        # Get annotation
        xmin_list = []
        ymin_list = []
        xmax_list = []
        ymax_list = []
        for element in elements:
            # xml -> object -> bndbox -> [xmin, ymin, xmax, ymax]
            xmin_list.append(int(element.find('bndbox').find('xmin').text))
            xmax_list.append(int(element.find('bndbox').find('xmax').text))
            ymin_list.append(int(element.find('bndbox').find('ymin').text))
            ymax_list.append(int(element.find('bndbox').find('ymax').text))
        # image crop & save
        for i, name in enumerate(names):
            bndbox_area = (xmin_list[i], ymin_list[i], xmax_list[i],
                           ymax_list[i])
            crop_image = origin_image.crop(bndbox_area)
            crop_image.save(
                os.path.join(args.save_path,
                             '{0}_{1}_{2}.jpg'.format(images_file, name, i)))
            # image generate counting
            crop_image_count += 1
    print('Crop end')
    print('생성된 이미지 수 :', crop_image_count)
    print("Crop time :", time.time() - start)

    print('File move start')
    start = time.time()  # 시작 시간 저장
    utils.move_files(args.save_path)
    print("Move time :", time.time() - start)
    print('File move end')
Example #15
0
def preprocess(config):

    root = config.root
    ratio = config.ratio
    # Remove key.txt
    if glob.glob(os.path.join(root, '*.txt')):
        os.remove(glob.glob(os.path.join(root, '*.txt'))[0])

    # Write Background parts
    file_list = os.listdir(root)
    png_names = [f[:-11] for f in file_list if 'png' in f]
    not_bg_names = [
        f for f in file_list if ('jpg' in f) and (f[:-4] in png_names)
    ]
    bg_names = [
        f for f in file_list if ('jpg' in f) and (f[:-4] not in png_names)
    ]
    assert len(not_bg_names) == len(png_names), \
           'The number of pairs is mismatched'

    #for bn in bg_names:
    #    os.remove(os.path.join(root, bn))

    # Split data by 3 categories: train, val, test
    train_ratio = ratio[0] / sum(ratio)
    val_ratio = ratio[1] / sum(ratio)

    bg_names = sorted(bg_names)
    not_bg_names = sorted(not_bg_names)
    label_names = sorted([f for f in file_list if 'png' in f])

    # Split paired data.
    n = len(not_bg_names)
    idx = list(range(n))
    train_idx = np.random.choice(idx, size=int(n * train_ratio), replace=False)
    idx = [i for i in idx if i not in train_idx]
    val_idx = np.random.choice(idx, size=int(n * val_ratio), replace=False)
    test_idx = [i for i in idx if i not in val_idx]

    train_images = [not_bg_names[i] for i in train_idx]
    train_labels = [label_names[i] for i in train_idx]
    val_images = [not_bg_names[i] for i in val_idx]
    val_labels = [label_names[i] for i in val_idx]
    test_images = [not_bg_names[i] for i in test_idx]
    test_labels = [label_names[i] for i in test_idx]

    print('Train Images: {0}, Train Labels: {1}\t'
          'Validation Images: {2}, Validation Labels: {3}\t'
          'Test Images: {4}, Test Labels: {5}'.format(len(train_images),
                                                      len(train_labels),
                                                      len(val_images),
                                                      len(val_labels),
                                                      len(test_images),
                                                      len(test_labels)))

    # Split background data
    n = len(bg_names)
    idx = list(range(n))
    train_idx = np.random.choice(idx, size=int(n * train_ratio), replace=False)
    idx = [i for i in idx if i not in train_idx]
    val_idx = np.random.choice(idx, size=int(n * val_ratio), replace=False)
    test_idx = [i for i in idx if i not in val_idx]

    train_bg_images = [bg_names[i] for i in train_idx]
    val_bg_images = [bg_names[i] for i in val_idx]
    test_bg_images = [bg_names[i] for i in test_idx]

    print('Train background images: {}\t'
          'Validation background images: {}\t'
          'Test background images: {}'.format(len(train_bg_images),
                                              len(val_bg_images),
                                              len(test_bg_images)))

    # Make Folders for saving
    mkdir(os.path.join(root, 'train'))
    mkdir(os.path.join(root, 'train', 'image'))
    mkdir(os.path.join(root, 'train', 'label'))

    mkdir(os.path.join(root, 'val'))
    mkdir(os.path.join(root, 'val', 'image'))
    mkdir(os.path.join(root, 'val', 'label'))

    mkdir(os.path.join(root, 'test'))
    mkdir(os.path.join(root, 'test', 'image'))
    mkdir(os.path.join(root, 'test', 'label'))

    # Add annotation information
    with open(os.path.join(root, 'train', 'annotation.txt'), 'w+') as f:
        for train_image in train_images:
            f.write('{}\t{}\n'.format(train_image, 1))
        for train_bg_image in train_bg_images:
            f.write('{}\t{}\n'.format(train_bg_image, 0))

    with open(os.path.join(root, 'val', 'annotation.txt'), 'w+') as f:
        for val_image in val_images:
            f.write('{}\t{}\n'.format(val_image, 1))
        for val_bg_image in val_bg_images:
            f.write('{}\t{}\n'.format(val_bg_image, 0))

    with open(os.path.join(root, 'test', 'annotation.txt'), 'w+') as f:
        for test_image in test_images:
            f.write('{}\t{}\n'.format(test_image, 1))
        for test_bg_image in test_bg_images:
            f.write('{}\t{}\n'.format(test_bg_image, 0))

    # Move images, labels to directory
    move_files(root, root + '/train/image', train_images)
    move_files(root, root + '/train/image', train_bg_images)
    move_files(root, root + '/train/label', train_labels)
    move_files(root, root + '/val/image', val_images)
    move_files(root, root + '/val/image', val_bg_images)
    move_files(root, root + '/val/label', val_labels)
    move_files(root, root + '/test/image', test_images)
    move_files(root, root + '/test/image', test_bg_images)
    move_files(root, root + '/test/label', test_labels)
Example #16
0
				def create_gallery(path, folder_name, do_chapters=True, archive=None):
					is_archive = True if archive else False
					temp_p = archive if is_archive else path
					folder_name = folder_name or path if folder_name or path else os.path.split(archive)[1]
					if utils.check_ignore_list(temp_p) and not GalleryDB.check_exists(temp_p, self.galleries_from_db, False):
						log_i('Creating gallery: {}'.format(folder_name.encode('utf-8', 'ignore')))
						new_gallery = Gallery()
						images_paths = []
						metafile = utils.GMetafile()
						try:
							con = scandir.scandir(temp_p) #all of content in the gallery folder
							log_i('Gallery source is a directory')
							chapters = sorted([sub.path for sub in con if sub.is_dir() or sub.name.endswith(utils.ARCHIVE_FILES)])\
							    if do_chapters else [] #subfolders
							# if gallery has chapters divided into sub folders
							numb_of_chapters = len(chapters)
							if numb_of_chapters != 0:
								log_i('Gallery has {} chapters'.format(numb_of_chapters))
								for ch in chapters:
									chap = new_gallery.chapters.create_chapter()
									chap.title = utils.title_parser(ch)['title']
									chap.path = os.path.join(path, ch)
									chap.pages = len(list(scandir.scandir(chap.path)))
									metafile.update(utils.GMetafile(chap.path))

							else: #else assume that all images are in gallery folder
								chap = new_gallery.chapters.create_chapter()
								chap.title = utils.title_parser(os.path.split(path)[1])['title']
								chap.path = path
								metafile.update(utils.GMetafile(chap.path))
								chap.pages = len(list(scandir.scandir(path)))
				
							parsed = utils.title_parser(folder_name)
						except NotADirectoryError:
							try:
								if is_archive or temp_p.endswith(utils.ARCHIVE_FILES):
									log_i('Gallery source is an archive')
									contents = utils.check_archive(temp_p)
									if contents:
										new_gallery.is_archive = 1
										new_gallery.path_in_archive = '' if not is_archive else path
										if folder_name.endswith('/'):
											folder_name = folder_name[:-1]
											fn = os.path.split(folder_name)
											folder_name = fn[1] or fn[2]
										folder_name = folder_name.replace('/','')
										if folder_name.endswith(utils.ARCHIVE_FILES):
											n = folder_name
											for ext in utils.ARCHIVE_FILES:
												n = n.replace(ext, '')
											parsed = utils.title_parser(n)
										else:
											parsed = utils.title_parser(folder_name)
												
										if do_chapters:
											archive_g = sorted(contents)
											if not archive_g:
												log_w('No chapters found for {}'.format(temp_p.encode(errors='ignore')))
												raise ValueError
											for g in archive_g:
												chap = new_gallery.chapters.create_chapter()
												chap.in_archive = 1
												chap.title = utils.title_parser(g)['title']
												chap.path = g
												metafile.update(utils.GMetafile(g, temp_p))
												arch = utils.ArchiveFile(temp_p)
												chap.pages = len(arch.dir_contents(g))
												arch.close()
										else:
											chap = new_gallery.chapters.create_chapter()
											chap.title = utils.title_parser(os.path.split(path)[1])['title']
											chap.in_archive = 1
											chap.path = path
											metafile.update(utils.GMetafile(path, temp_p))
											arch = utils.ArchiveFile(temp_p)
											chap.pages = len(arch.dir_contents(''))
											arch.close()
									else:
										raise ValueError
								else:
									raise ValueError
							except ValueError:
								log_w('Skipped {} in local search'.format(path.encode(errors='ignore')))
								self.skipped_paths.append((temp_p, 'Empty archive',))
								return
							except app_constants.CreateArchiveFail:
								log_w('Skipped {} in local search'.format(path.encode(errors='ignore')))
								self.skipped_paths.append((temp_p, 'Error creating archive',))
								return

						new_gallery.title = parsed['title']
						new_gallery.path = temp_p
						new_gallery.artist = parsed['artist']
						new_gallery.language = parsed['language']
						new_gallery.info = ""
						metafile.apply_gallery(new_gallery)
						if app_constants.MOVE_IMPORTED_GALLERIES and not app_constants.OVERRIDE_MOVE_IMPORTED_IN_FETCH:
							new_gallery.path = utils.move_files(temp_p)

						self.data.append(new_gallery)
						log_i('Gallery successful created: {}'.format(folder_name.encode('utf-8', 'ignore')))
					else:
						log_i('Gallery already exists: {}'.format(folder_name.encode('utf-8', 'ignore')))
						self.skipped_paths.append((temp_p, 'Already exists'))
#/usr/bin/env python
"""		
From a file containing paths of files, move them to a new folder.		
"""
import argparse
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                    level=logging.INFO)

import utils

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('filepaths',
                        metavar='path_file',
                        help='File containing paths to images')
    parser.add_argument('movefolder',
                        metavar='move_folder',
                        help='Folder where images will be moved')
    parser.add_argument('-c',
                        '--copy',
                        action='store_true',
                        default=False,
                        help='Copy files instead of move them')
    args = parser.parse_args()
    utils.move_files(args.filepaths, args.movefolder, args.copy)
Example #18
0
				def create_gallery(path, folder_name, do_chapters=True, archive=None):
					is_archive = True if archive else False
					temp_p = archive if is_archive else path
					folder_name = folder_name or path if folder_name or path else os.path.split(archive)[1]
					if utils.check_ignore_list(temp_p) and not GalleryDB.check_exists(temp_p, self.galleries_from_db, False):
						log_i('Creating gallery: {}'.format(folder_name.encode('utf-8', 'ignore')))
						new_gallery = Gallery()
						images_paths = []
						try:
							con = scandir.scandir(temp_p) #all of content in the gallery folder
							log_i('Gallery source is a directory')
							chapters = sorted([sub.path for sub in con if sub.is_dir() or sub.name.endswith(utils.ARCHIVE_FILES)])\
							    if do_chapters else [] #subfolders
							# if gallery has chapters divided into sub folders
							if len(chapters) != 0:
								log_i('Gallery has chapters divided in directories')
								for numb, ch in enumerate(chapters):
									chap_path = os.path.join(path, ch)
									new_gallery.chapters[numb] = chap_path

							else: #else assume that all images are in gallery folder
								new_gallery.chapters[0] = path
				
							##find last edited file
							#times = set()
							#for root, dirs, files in os.walk(path, topdown=False):
							#	for img in files:
							#		fp = os.path.join(root, img)
							#		times.add( os.path.getmtime(fp) )
							#last_updated = time.asctime(time.gmtime(max(times)))
							#new_gallery.last_update = last_updated
							parsed = utils.title_parser(folder_name)
						except NotADirectoryError:
							try:
								if is_archive or temp_p.endswith(utils.ARCHIVE_FILES):
									log_i('Gallery source is an archive')
									contents = utils.check_archive(temp_p)
									if contents:
										new_gallery.is_archive = 1
										new_gallery.path_in_archive = '' if not is_archive else path
										if folder_name.endswith('/'):
											folder_name = folder_name[:-1]
											fn = os.path.split(folder_name)
											folder_name = fn[1] or fn[2]
										folder_name = folder_name.replace('/','')
										if folder_name.endswith(utils.ARCHIVE_FILES):
											n = folder_name
											for ext in utils.ARCHIVE_FILES:
												n = n.replace(ext, '')
											parsed = utils.title_parser(n)
										else:
											parsed = utils.title_parser(folder_name)
										if do_chapters:
											archive_g = sorted(contents)
											if not archive_g:
												log_w('No chapters found for {}'.format(temp_p.encode(errors='ignore')))
												raise ValueError
											for n, g in enumerate(archive_g):
												new_gallery.chapters[n] = g
										else:
											new_gallery.chapters[0] = path
									else:
										raise ValueError
								else:
									raise ValueError
							except ValueError:
								log_w('Skipped {} in local search'.format(path.encode(errors='ignore')))
								self.skipped_paths.append(temp_p)
								return

						new_gallery.title = parsed['title']
						new_gallery.path = temp_p
						new_gallery.artist = parsed['artist']
						new_gallery.language = parsed['language']
						new_gallery.info = "No description.."
						if gui_constants.MOVE_IMPORTED_GALLERIES and not gui_constants.OVERRIDE_MOVE_IMPORTED_IN_FETCH:
							new_gallery.path = utils.move_files(temp_p)

						self.data.append(new_gallery)
						log_i('Gallery successful created: {}'.format(folder_name.encode('utf-8', 'ignore')))
					else:
						log_i('Gallery already exists: {}'.format(folder_name.encode('utf-8', 'ignore')))
						self.skipped_paths.append(temp_p)
Example #19
0
                def create_gallery(path,
                                   folder_name,
                                   do_chapters=True,
                                   archive=None):
                    is_archive = True if archive else False
                    temp_p = archive if is_archive else path
                    folder_name = folder_name or path if folder_name or path else os.path.split(
                        archive)[1]
                    if utils.check_ignore_list(
                            temp_p) and not GalleryDB.check_exists(
                                temp_p, self.galleries_from_db, False):
                        log_i('Creating gallery: {}'.format(
                            folder_name.encode('utf-8', 'ignore')))
                        new_gallery = Gallery()
                        images_paths = []
                        metafile = utils.GMetafile()
                        try:
                            con = scandir.scandir(
                                temp_p)  #all of content in the gallery folder
                            log_i('Gallery source is a directory')
                            chapters = sorted([sub.path for sub in con if sub.is_dir() or sub.name.endswith(utils.ARCHIVE_FILES)])\
                                if do_chapters else [] #subfolders
                            # if gallery has chapters divided into sub folders
                            numb_of_chapters = len(chapters)
                            if numb_of_chapters != 0:
                                log_i('Gallery has {} chapters'.format(
                                    numb_of_chapters))
                                for ch in chapters:
                                    chap = new_gallery.chapters.create_chapter(
                                    )
                                    chap.title = utils.title_parser(
                                        ch)['title']
                                    chap.path = os.path.join(path, ch)
                                    chap.pages = len(
                                        list(scandir.scandir(chap.path)))
                                    metafile.update(utils.GMetafile(chap.path))

                            else:  #else assume that all images are in gallery folder
                                chap = new_gallery.chapters.create_chapter()
                                chap.title = utils.title_parser(
                                    os.path.split(path)[1])['title']
                                chap.path = path
                                metafile.update(utils.GMetafile(chap.path))
                                chap.pages = len(list(scandir.scandir(path)))

                            parsed = utils.title_parser(folder_name)
                        except NotADirectoryError:
                            try:
                                if is_archive or temp_p.endswith(
                                        utils.ARCHIVE_FILES):
                                    log_i('Gallery source is an archive')
                                    contents = utils.check_archive(temp_p)
                                    if contents:
                                        new_gallery.is_archive = 1
                                        new_gallery.path_in_archive = '' if not is_archive else path
                                        if folder_name.endswith('/'):
                                            folder_name = folder_name[:-1]
                                            fn = os.path.split(folder_name)
                                            folder_name = fn[1] or fn[2]
                                        folder_name = folder_name.replace(
                                            '/', '')
                                        if folder_name.endswith(
                                                utils.ARCHIVE_FILES):
                                            n = folder_name
                                            for ext in utils.ARCHIVE_FILES:
                                                n = n.replace(ext, '')
                                            parsed = utils.title_parser(n)
                                        else:
                                            parsed = utils.title_parser(
                                                folder_name)

                                        if do_chapters:
                                            archive_g = sorted(contents)
                                            if not archive_g:
                                                log_w(
                                                    'No chapters found for {}'.
                                                    format(
                                                        temp_p.encode(
                                                            errors='ignore')))
                                                raise ValueError
                                            for g in archive_g:
                                                chap = new_gallery.chapters.create_chapter(
                                                )
                                                chap.in_archive = 1
                                                chap.title = utils.title_parser(
                                                    g)['title']
                                                chap.path = g
                                                metafile.update(
                                                    utils.GMetafile(g, temp_p))
                                                arch = utils.ArchiveFile(
                                                    temp_p)
                                                chap.pages = len(
                                                    arch.dir_contents(g))
                                                arch.close()
                                        else:
                                            chap = new_gallery.chapters.create_chapter(
                                            )
                                            chap.title = utils.title_parser(
                                                os.path.split(path)
                                                [1])['title']
                                            chap.in_archive = 1
                                            chap.path = path
                                            metafile.update(
                                                utils.GMetafile(path, temp_p))
                                            arch = utils.ArchiveFile(temp_p)
                                            chap.pages = len(
                                                arch.dir_contents(''))
                                            arch.close()
                                    else:
                                        raise ValueError
                                else:
                                    raise ValueError
                            except ValueError:
                                log_w('Skipped {} in local search'.format(
                                    path.encode(errors='ignore')))
                                self.skipped_paths.append((
                                    temp_p,
                                    'Empty archive',
                                ))
                                return
                            except app_constants.CreateArchiveFail:
                                log_w('Skipped {} in local search'.format(
                                    path.encode(errors='ignore')))
                                self.skipped_paths.append((
                                    temp_p,
                                    'Error creating archive',
                                ))
                                return

                        new_gallery.title = parsed['title']
                        new_gallery.path = temp_p
                        new_gallery.artist = parsed['artist']
                        new_gallery.language = parsed['language']
                        new_gallery.info = ""
                        metafile.apply_gallery(new_gallery)
                        if app_constants.MOVE_IMPORTED_GALLERIES and not app_constants.OVERRIDE_MOVE_IMPORTED_IN_FETCH:
                            new_gallery.path = utils.move_files(temp_p)

                        self.data.append(new_gallery)
                        log_i('Gallery successful created: {}'.format(
                            folder_name.encode('utf-8', 'ignore')))
                    else:
                        log_i('Gallery already exists: {}'.format(
                            folder_name.encode('utf-8', 'ignore')))
                        self.skipped_paths.append((temp_p, 'Already exists'))
Example #20
0
    parser.add_argument('-o',
                        '--output',
                        default='./error',
                        help='Path to a folder to save images with error.')
    parser.add_argument('-m',
                        '--move',
                        action='store_true',
                        help='Move files in case of output is a folder.')
    args = parser.parse_args()

    input = args.finput
    if isdir(input):
        input = input + '/'
        dirin = dirname(realpath(input))
        inputfile = join(dirin, 'paths.txt')
        create_paths(input, inputfile)
        input = inputfile

    output = args.output
    if not isdir(output):
        output = join(dirname(input), output)
        os.mkdir(output)
    outputfile = join(dirname(input), 'error.txt')

    verify_errors(input, outputfile)

    if args.move:
        move_files(outputfile, output, copy_files=False)
    else:
        move_files(outputfile, output, copy_files=True)