def main(): config_path = 'visio_settings.json' pickle_file = 'visio_data.pickle' logger.debug('Config: {}'.format(config_path)) logger.debug('Pickle: {}'.format(pickle_file)) cnf_data = load_config(config_path) logger.debug(cnf_data) path = cnf_data['folders'] filepath = os.path.join(*path) filepath = os.path.expanduser(filepath) logger.info(filepath) if not os.path.exists(filepath): logger.critical('Missing Path {}'.format(filepath)) raise 'missing path' data = get_pickle_data(pickle_file) if data == {}: data['folders'] = path data['filter'] = cnf_data['filter'] data['files'] = [] data['hashes'] = {} for f in scanFiles(filepath): filename, file_extension = os.path.splitext(f['file']) if file_extension == cnf_data['filter']: logger.info('Scanning filename: {}{}'.format( filename, file_extension)) f_path = os.path.normpath(os.path.join(f['folder'], f['file'])) file_hash = make_hash(f_path) file_hash_sha = file_hash['SHA1'] if not file_hash_sha in data['hashes']: data['hashes'][file_hash_sha] = [] if not f_path in data['hashes'][file_hash_sha]: logger.info('\tadding hash {}'.format(file_hash_sha)) data['hashes'][file_hash_sha].append(f_path) scan_file = True scan_idx = -1 for s in data['files']: scan_idx += 1 s_path = os.path.normpath(os.path.join(s['folder'], s['file'])) if f_path == s_path: if s['hash']['SHA1'] == file_hash_sha: # file has been scanned and has not changed by hash logger.info('\tfile already scanned!') scan_file = False else: logging.info('\tFile is out of date, updating!') if not 'archives' in data: data['archives'] = [] data['archives'].append(s) del data['files'][scan_idx] # we found the file... so no need to search anymore break if not scan_file: continue dwg_file = process_visiofile(f_path) # add the file details... dwg_file['folder'] = f['folder'] dwg_file['file'] = f['file'] dwg_file['modified'] = parse(f['modified']) dwg_file['accessed'] = parse(f['accessed']) dwg_file['size'] = f['size'] dwg_file['hash'] = file_hash dwg_file['GUID'] = uuid.uuid4() data['files'].append(dwg_file) save_pickle_data(data, pickle_file)
from getFiles import scanFiles, get_info, get_pickle_data, save_pickle_data import os for f in scanFiles('./'): filename, file_extension = os.path.splitext(f['file']) if file_extension == '.pickle': print(f, filename, file_extension) data = get_pickle_data(f['file']) for item in data: print(item, len(data[item])) step = 1 interval = len(data['files']) // 10 for f in data['files']: step += 1 #if step % interval == 0: #if 'thunderbird' in f['folder'].lower(): if '.zip' in f['file'].lower(): print(f)
rt_row['name'] = rt['name'] rt_row['recordTypeId'] = rt['recordTypeId'] rt_rows.append(rt_row) data['record_type'] = rt_rows return data config_path = r'sf.secrets.json' instance = 'KaptioStaging' config_data = get_config(config_path, instance) pickle_file = 'salesforce_data.pickle' data = get_pickle_data(pickle_file) if not instance in data: data[instance] = {} sf = connect_sf(config_data) # save the runtime details data[instance] = get_metadata(sf) for obj_name in data[instance]: row = data[instance][obj_name] result = get_object_metadata(sf, row['name']) data[instance][obj_name] = { **row, **result, 'scanned_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
from getFiles import scanFiles, get_info, get_pickle_data, save_pickle_data from collections import defaultdict import os import logging from logging.config import fileConfig fileConfig('logging_config.ini') logger = logging.getLogger(__name__) files_found = 0 picklename = "{}.pickle".format("scan_files") data = get_pickle_data(picklename=picklename) if not data.get("extension"): data_ext = defaultdict(list) data_files = defaultdict(list) for f in scanFiles('c:\\'): files_found += 1 filename, file_extension = os.path.splitext(f['file']) data_files[f['file']].append(f) data_ext[file_extension].append(f) logger.info("files scanned:{}".format(files_found)) data = {} data['extension'] = data_ext data['files'] = data_files save_pickle_data(data=data, picklename=picklename) logger.info("Data Saved:{}".format(picklename))