def __init__(self, filename):
     dfile = filename.split('.')[0] + '.pkl'
     try:
         dump = self.loadfile(dfile)
         self.bag_of_word = dump['bag_of_word']
         self.bag_of_index = dump['bag_of_index']
         self.markov_matrix = np.array(dump['markov_matrix'])
     except OSError as e:
         self.bag_of_word = {}
         self.bag_of_index = {}
         self.markov_matrix = np.array([[0]])
         index = 0
         try:
             with open(filename, 'r') as file:
                 print(
                     "log: this would show up if its a new file. A markov chain will be created and saved"
                 )
                 low = (' '.join(file.read().splitlines())).split(' ')
                 progress = ProgressBar(len(low) - 1, fmt=ProgressBar.FULL)
                 for i in range(progress.total):
                     progress.current += 1
                     progress()
                     if self.bag_of_word.setdefault(low[i], index) == index:
                         self.bag_of_index[index] = low[i]
                         self.markov_matrix = np.pad(self.markov_matrix,
                                                     [(0, self.max(index)),
                                                      (0, self.max(index))],
                                                     mode='constant')
                         index += 1
                     if self.bag_of_word.setdefault(low[i + 1],
                                                    index) == index:
                         self.bag_of_index[index] = low[i + 1]
                         self.markov_matrix = np.pad(self.markov_matrix,
                                                     [(0, self.max(index)),
                                                      (0, self.max(index))],
                                                     mode='constant')
                         index += 1
                     self.markov_matrix[self.bag_of_word[low[i]]][
                         self.bag_of_word[low[i + 1]]] += 1
                 progress.done()
             s = np.sum(self.markov_matrix, axis=1)[:, np.newaxis]
             s[s == 0] = 1
             self.markov_matrix = self.markov_matrix / s
             self.markov_matrix[self.markov_matrix.shape[0] - 1][0] = 1
             dump = {}
             dump['bag_of_word'] = self.bag_of_word
             dump['bag_of_index'] = self.bag_of_index
             dump['markov_matrix'] = self.markov_matrix
             self.savefile(dump, dfile)
             del (dump)
             print("log:chain for ", filename, " is created")
         except OSError as e:
             print("file not found !")
             exit()
Ejemplo n.º 2
0
def download_images(uri_list, download_location, retain_original_naming=True):
    num = len(uri_list)
    progress = ProgressBar(num, fmt=ProgressBar.FULL)
    for i, src in enumerate(uri_list):
        ############################### DO WORK HERE ###########################
        try:
            img_data = urlopen(src).read()
            if len(img_data) > 0:  #Read Success
                filename = basename(urlsplit(src)[2]).strip()
                if not retain_original_naming:
                    filetype = filename.split('.')[-1]
                    filename = str(i + 1) + '.' + filetype
                output = open(os.path.join(download_location, filename), 'wb')
                output.write(img_data)
                output.close()
        except Exception as e:
            log_error(e)
        ############################### END OF WORK #$##########################
        progress.current += 1
        progress()
        sleep(0.001)
    progress.done()
Ejemplo n.º 3
0
def main(argv):
    check_dependencies()

    move_files = False
    date_regex = None
    log_fname = None
    sha_rename = False
    dir_format = os.path.sep.join(['%Y', '%m', '%d'])

    try:
        opts, args = getopt.getopt(argv[2:], "d:r:l:mhs", ["date=", "regex=", "log=", "move", "help","sha-rename"])
    except getopt.GetoptError as e:
        print(e)
        help_info()

    for opt, arg in opts:
        if opt in ("-h", "--help"):
            print('Printing help:')
            help_info()

        if opt in ("-d", "--date"):
            if not arg:
                print('Date format cannot be empty')
                sys.exit(0)
            dir_format = parse_date_format(arg)

        if opt in ("-l", "--log"):
            if not arg:
                print("log file name can't be empty")
                sys.exit(0)
            log_fname = arg

        if opt in ("-m", "--move"):
            move_files = True
            print('Using move strategy!')

        if opt in ("-s", "--sha-rename"):
            sha_rename = True
            print('Renaming files to SHA256 name!')

        if opt in ("-r", "--regex"):
            try:
                date_regex = re.compile(arg)
            except:
                error("Provided regex is invalid!")

    if len(argv) < 2:
        print('ERROR: Number of arguments are less than 2')
        print(argv)
        help_info()

    inputdir = os.path.expanduser(argv[0])
    outputdir = os.path.expanduser(argv[1])

    if not os.path.isdir(inputdir) or not os.path.exists(inputdir):
        error('Input directory "%s" does not exist or cannot be accessed' % inputdir)
    if not os.path.exists(outputdir):
        print('Output directory "%s" does not exist, creating now' % outputdir)
        try:
            os.makedirs(outputdir)
        except Exception:
            print('Cannot create output directory. No write access!')
            sys.exit(0)

    if log_fname:
        logging.basicConfig(filename=log_fname, level='DEBUG')
    else:
        logging.basicConfig(level='CRITICAL')

    ignored_files = ('.DS_Store', 'Thumbs.db')
    error_list = list()
    count = dict(copied=0, moved=0, duplicate=0, error=0, other=0 )
    total_count=0
    for _, _, files in os.walk(inputdir): total_count += len(files)
    bar = ProgressBar(total_count, count.keys())
    for root, _, files in os.walk(inputdir):
        for filename in files:
            try:
                if filename in ignored_files:
                    continue
                if not sha_rename:
                    status = handle_file(os.path.join(root, filename), outputdir, dir_format, move_files, date_regex)
                else:
                    status = handle_file2(os.path.join(root, filename), outputdir, dir_format, move_files, date_regex)
                count[status] += 1
                bar.increment(status)
            except KeyboardInterrupt:
                print('\n Exiting...')
                print_summary(count)
                sys.exit(0)
            except Exception as e :
                logging.error('Error skipping %s:%s' % (filename,repr(e)))
                error_list.append(os.path.join(root, filename))
                count['error'] += 1
                bar.increment('error')
    bar.done()
    logging.info('===Files with Errors===')
    for fn in error_list:
        logging.info(fn)
    logging.info('===End Files with Errors===')
    print_summary(count)