def main(): """ The main routine which kicks everything off :return: """ # Setup the command line arguments flags = argparse.ArgumentParser(description="Tool to validate and fix errors in CSV files for TADC imports") flags.add_argument('csv_file', type=str, help="Path to a CSV file to validate") flags.add_argument('header_rows', type=str, help="Number of header rows") flags.add_argument('--fix-missing', '-f', action='store_true', help="Fix missing fields by inserting the value 'unknown'") flags.add_argument('--output-dir', '-o', type=str, help='Where to put output files', default=os.getcwd()) flags.add_argument('--log-dir', '-l', type=str, help='Where to put log files', default='/tmp') flags.add_argument('--log-level', type=str, help='Choose a log level', default='INFO') flags.add_argument('--old-date-format', type=str, help="the format of dates that will be fixed", default='%d/%m/%Y') args = flags.parse_args() log_filename = os.path.join( args.log_dir, 'tadc_import_validator_{}.log'.format(os.path.basename(time.strftime('%Y%m%d-%H%M%S'))) ) # register some logging handlers log_handler = FileHandler( log_filename, mode='w', level=args.log_level, bubble=True ) stdout_handler = StreamHandler(sys.stdout, level=args.log_level, bubble=True) with stdout_handler.applicationbound(): with log_handler.applicationbound(): log.info("Arguments: {}".format(args)) start = time.time() log.info("starting at {}".format(time.strftime('%l:%M%p %Z on %b %d, %Y'))) with CSVFileValidator( csv_file=args.csv_file, header_rows=args.header_rows, output_dir=args.output_dir, old_date_format=args.old_date_format, fix_missing=args.fix_missing) as validator: validator.validate_file() log.info("Running time: {}".format(str(datetime.timedelta(seconds=(round(time.time() - start, 3)))))) log.info("Log written to {}:".format(log_filename)) log.info("Fixed data is in: {}".format(validator.get_fixed_filename()))
def main(docs, bundle, build_dir, tmp_dir, recreate, debug, w): ''' annotatedocs analyzes your sphinx-based documentation and provides helpful feedback about the quality and possible improvements. The first argument should be the path to where your documentation lives (e.g. the one in which you usually call 'make html'). If you leave the first argument empty it defaults to the current working directory. The build will usually be written to <your docs dir>/_build/annotatedhtml/. You can change the output directory with the -b option. ''' # Ignore SIG_PIPE so that piping works correctly. signal(SIGPIPE, SIG_DFL) if debug: log_level = 'DEBUG' else: log_level = 'INFO' null_handler = NullHandler(level='DEBUG') log_handler = StreamHandler(sys.stderr, level=log_level) with null_handler.applicationbound(): with log_handler.applicationbound(): loader = get_loader(docs, build_dir=build_dir, tmp_dir=tmp_dir) if recreate: loader.cleanup() loader.setup() confoverrides = {} if bundle: confoverrides['annotatedocs_bundle'] = bundle index_file = loader.build(confoverrides=confoverrides) if w: webbrowser.open(index_file)
def run(): raven_client = Client() logger = Logger("ardegra") spider_name = " ".join(sys.argv[1:]) client = pymongo.MongoClient("mongodb://{}/ardegra".format( Config.DATABASE_ADDRESS)) logger.debug("Running: {}".format(spider_name)) try: db = client["ardegra"] document = db.spiders.find_one({"name": spider_name}) if document["type"]["name"] == "Forum Spider 1": spider = ForumSpider1(name=spider_name) elif document["type"]["name"] == "News Spider 1": spider = NewsSpider1(name=spider_name) elif document["type"]["name"] == "News Spider 2": spider = NewsSpider2(name=spider_name) spider.run() except Exception as err: raven_client.captureException() logger.error(str(err)) finally: client.close() if __name__ == "__main__": handler = StreamHandler(sys.stdout) with handler.applicationbound(): run()
def main(): """printing argument we got to test""" #print_arguments(args=args) file_count = 0 page_count = 0 upload_count = 0 log_handler= setup_logging() stdout_handler = StreamHandler(sys.stdout, level=args.loglevel, bubble=True) with stdout_handler.applicationbound(): with log_handler.applicationbound(): log.info("MM Python log file test") log.info("Arguments: {}".format(args)) start = time.time() log.info("starting at {}".format(time.strftime('%I:%M%p %Z on %b %d, %Y')),start) """get credentials and build authorized google drive service""" http = get_authorized_google_http() gdrive_service = discovery.build('drive', 'v3', http=http) #create s3 client for upload s3client = create_aws_client() # load up a match file if we have one. """getting files in the selected folder passed in the argument""" query = gdrive_service.files().list( pageSize=args.pagesize,q="'{}' in parents".format(args.folder_id),fields="nextPageToken, files(id, name)") #get all the files and read byte while query is not None: try: result_gfiles = query.execute(http=http) page_count += 1 print("\nwhile loop query, page number:" + str(page_count)) #printing files in retrieived list to test #print_allFiles(result=result_gfiles) # determine the page at which to start processing as per user input if page_count >= args.startpage: log.info(u"######## Page {} ########".format(page_count)) for this_file in result_gfiles.get('files', []): print('\n Number of file processed: ' + str(file_count)) log.info('Current file ID and name under process: ' +this_file['id'] + ',' + this_file['name']) file_count+=1 if args.matchfile != None: log.info('Match file parameter passed by user:'******'name'], args.matchfile): log.info('match found:' + this_file['name']) # download from gdrive tempbyte = download_from_Gdrive(gdrive_service, this_file) # upload to s3 val=upload_to_s3(args.bucket, args.keyprefix, tempbyte, s3client, this_file) if val==True: log.info('Upload complete for file:'+ this_file['name']) upload_count += 1 break else: log.info('File not macthed, file from gdrive: ' + this_file['name'] + ', matching file name by user( '+ args.matchfile + ') not found on page-' + str(page_count)) else: log.info('Match file parameter not passed by user,uploading all file on this page:'+ str(page_count) ) #download from gdrive tempbyte = download_from_Gdrive(gdrive_service,this_file) #upload to s3 val= upload_to_s3(args.bucket,args.keyprefix,tempbyte,s3client,this_file) if val == True: log.info('Upload complete for file:' + this_file['name']) upload_count += 1 # stop if we have come to the last user specified page if args.endpage and page_counter == args.endpage: log.info(u"User defined engapage reached, finished paging at page {}".format(page_counter)) readbyte.close() log.info("Running time: {}".format(str(datetime.timedelta(seconds=(round(time.time() - start, 3)))))) log.info('Total files processed:' + str(file_count)) log.info('Total files uploaded: '+ str(upload_count)) break page_token = result_gfiles.get('nextPageToken') if not page_token: print('\nLast page token') log.info('Reached the end of pagination, All files traversed') readbyte.close() log.info("Running time: {}".format(str(datetime.timedelta(seconds=(round(time.time() - start, 3)))))) log.info('Total files processed:' + str(file_count)) log.info('Total files uploaded: '+ str(upload_count)) break except (Exception) as e: log.error ('An error occurred: '+ str(e) ) readbyte.close() log.error(e) break print('\n program ended') readbyte.close()
return args, state_dict, resume if __name__ == '__main__': args, state_dict, resume = prepare() # redirect stdout and stderr to log file # redirection = open(log_name, 'a', buffering=1) # sys.stdout = redirection # sys.stderr = redirection stdout_handler = StreamHandler(sys.stdout, bubble=True) stderr_handler = StderrHandler(level=WARNING) # write logs to log.MODEL file # file_handler = FileHandler(log_name, bubble=True) # file_handler.format_string = '{record.message},{record.extra[cwd]}' # file_handler.format_string = '[{record.time:%Y-%m-%d %H:%M:%S.%f%z}] {record.level_name}: {record.message}' # with file_handler.applicationbound(): stdout_handler.format_string = '[{record.time:%Y-%m-%d %H:%M:%S.%f%z}] ' \ '{record.level_name}: {record.message}' with stdout_handler.applicationbound(): if resume: logger.info( f'Resume training from checkpoint: {Loader.get_latest(args.model)[1]}' ) try: main(args) except Exception as e: logger.error(f'\n{traceback.format_exc()}')
def main(): """Shows basic usage of the Google Drive API. Creates a Google Drive API service object and outputs the names and IDs for up to 10 files. """ log_filename = os.path.join( args.log_dir, 'google-drive-to-s3-{}.log'.format(os.path.basename(time.strftime('%Y%m%d-%H%M%S'))) ) # register some logging handlers log_handler = FileHandler( log_filename, mode='w', level=args.log_level, bubble=True ) stdout_handler = StreamHandler(sys.stdout, level=args.log_level, bubble=True) with stdout_handler.applicationbound(): with log_handler.applicationbound(): log.info("Arguments: {}".format(args)) start = time.time() log.info("starting at {}".format(time.strftime('%l:%M%p %Z on %b %d, %Y'))) credentials = get_credentials() http = credentials.authorize(httplib2.Http()) drive_service = discovery.build('drive', 'v3', http=http) s3 = boto3.resource('s3') # load up a match file if we have one. if args.match_file: with open(args.match_file, 'r') as f: match_filenames = f.read().splitlines() else: match_filenames = None # get the files in the specified folder. files = drive_service.files() request = files.list( pageSize=args.page_size, q="'{}' in parents".format(args.folder_id), fields="nextPageToken, files(id, name)" ) # make sure our S3 Key prefix has a trailing slash key_prefix = ensure_trailing_slash(args.key_prefix) page_counter = 0 file_counter = 0 while request is not None: file_page = request.execute(http=http) page_counter += 1 page_file_counter = 0 # reset the paging file counter # determine the page at which to start processing. if page_counter >= args.start_page: log.info(u"######## Page {} ########".format(page_counter)) for this_file in file_page['files']: file_counter += 1 page_file_counter += 1 if we_should_process_this_file(this_file['name'], match_filenames): log.info(u"#== Processing {} file number {} on page {}. {} files processed.".format( this_file['name'], page_file_counter, page_counter, file_counter )) # download the file download_request = drive_service.files().get_media(fileId=this_file['id']) fh = io.BytesIO() # Using an in memory stream location downloader = MediaIoBaseDownload(fh, download_request) done = False pbar = InitBar(this_file['name']) while done is False: status, done = downloader.next_chunk() pbar(int(status.progress()*100)) # print("\rDownload {}%".format(int(status.progress() * 100))) del pbar # upload to bucket log.info(u"Uploading to S3") s3.Bucket(args.bucket).put_object( Key="{}{}".format(key_prefix, this_file['name']), Body=fh.getvalue(), ACL='public-read' ) log.info(u"Uploaded to S3") fh.close() # close the file handle to release memory else: log.info(u"Do not need to process {}".format(this_file['name'])) # stop if we have come to the last user specified page if args.end_page and page_counter == args.end_page: log.info(u"Finished paging at page {}".format(page_counter)) break # request the next page of files request = files.list_next(request, file_page) log.info("Running time: {}".format(str(datetime.timedelta(seconds=(round(time.time() - start, 3)))))) log.info("Log written to {}:".format(log_filename))
def main(): """ Copy a folder from Source to Target """ log_filename = os.path.join( args.log_dir, 'copy-google-drive-folder-{}.log'.format( os.path.basename(time.strftime('%Y%m%d-%H%M%S')))) # register some logging handlers log_handler = FileHandler(log_filename, mode='w', level=args.log_level, bubble=True) stdout_handler = StreamHandler(sys.stdout, level=args.log_level, bubble=True) with stdout_handler.applicationbound(): with log_handler.applicationbound(): log.info("Arguments: {}".format(args)) start = time.time() log.info("starting at {}".format( time.strftime('%l:%M%p %Z on %b %d, %Y'))) credentials = get_credentials() http = credentials.authorize(httplib2.Http()) drive_service = discovery.build('drive', 'v3', http=http) # get the files in the specified folder. files = drive_service.files() request = files.list( pageSize=args.page_size, q="'{}' in parents".format(args.source_folder_id), fields="nextPageToken, files(id, name, mimeType)") page_counter = 0 file_counter = 0 while request is not None: file_page = request.execute(http=http) page_counter += 1 page_file_counter = 0 # reset the paging file counter # determine the page at which to start processing. if page_counter >= args.start_page: log.info(u"######## Page {} ########".format(page_counter)) for this_file in file_page['files']: file_counter += 1 page_file_counter += 1 log.info( u"#== Processing {} {} file number {} on page {}. {} files processed." .format(this_file['mimeType'], this_file['name'], page_file_counter, page_counter, file_counter)) # if not a folder if this_file[ 'mimeType'] != 'application/vnd.google-apps.folder': # Copy the file new_file = {'title': this_file['name']} copied_file = drive_service.files().copy( fileId=this_file['id'], body=new_file).execute() # move it to it's new location drive_service.files().update( fileId=copied_file['id'], addParents=args.target_folder_id, removeParents=args.source_folder_id).execute() else: log.info(u"Skipped Folder") else: log.info(u"Skipping Page {}".format(page_counter)) # stop if we have come to the last user specified page if args.end_page and page_counter == args.end_page: log.info( u"Finished paging at page {}".format(page_counter)) break # request the next page of files request = files.list_next(request, file_page) log.info("Running time: {}".format( str(datetime.timedelta(seconds=(round(time.time() - start, 3)))))) log.info("Log written to {}:".format(log_filename))
def main(): """ Copy a folder from Source to Target """ log_filename = os.path.join( args.log_dir, 'copy-google-drive-folder-{}.log'.format(os.path.basename(time.strftime('%Y%m%d-%H%M%S'))) ) # register some logging handlers log_handler = FileHandler( log_filename, mode='w', level=args.log_level, bubble=True ) stdout_handler = StreamHandler(sys.stdout, level=args.log_level, bubble=True) with stdout_handler.applicationbound(): with log_handler.applicationbound(): log.info("Arguments: {}".format(args)) start = time.time() log.info("starting at {}".format(time.strftime('%l:%M%p %Z on %b %d, %Y'))) credentials = get_credentials() http = credentials.authorize(httplib2.Http()) drive_service = discovery.build('drive', 'v3', http=http) # get the files in the specified folder. files = drive_service.files() request = files.list( pageSize=args.page_size, q="'{}' in parents".format(args.source_folder_id), fields="nextPageToken, files(id, name, mimeType)" ) page_counter = 0 file_counter = 0 while request is not None: file_page = request.execute(http=http) page_counter += 1 page_file_counter = 0 # reset the paging file counter # determine the page at which to start processing. if page_counter >= args.start_page: log.info(u"######## Page {} ########".format(page_counter)) for this_file in file_page['files']: file_counter += 1 page_file_counter += 1 log.info(u"#== Processing {} {} file number {} on page {}. {} files processed.".format( this_file['mimeType'], this_file['name'], page_file_counter, page_counter, file_counter )) # if not a folder if this_file['mimeType'] != 'application/vnd.google-apps.folder': # Copy the file new_file = {'title': this_file['name']} copied_file = drive_service.files().copy(fileId=this_file['id'], body=new_file).execute() # move it to it's new location drive_service.files().update( fileId=copied_file['id'], addParents=args.target_folder_id, removeParents=args.source_folder_id ).execute() else: log.info(u"Skipped Folder") else: log.info(u"Skipping Page {}".format(page_counter)) # stop if we have come to the last user specified page if args.end_page and page_counter == args.end_page: log.info(u"Finished paging at page {}".format(page_counter)) break # request the next page of files request = files.list_next(request, file_page) log.info("Running time: {}".format(str(datetime.timedelta(seconds=(round(time.time() - start, 3)))))) log.info("Log written to {}:".format(log_filename))
import sys # 1. set format_string sh = StreamHandler( sys.stdout, format_string= "[{record.time}] {record.level_name}: {record.channel}:{record.message}") # 2. set format_string sh.format_string = "[{record.time}] {record.level_name}: {record.channel}:{record.message}" # 3. invoke the format function def my_format_fun(record, handler): return " ".join([ "[" + str(record.time) + "]", record.level_name + ":" + record.channel + ":", record.message ]) sh.formatter = my_format_fun def main(): info("test") if __name__ == "__main__": with sh.applicationbound(): main()
from logbook import warn, StreamHandler import sys from termcc.cc import cc my_handler = StreamHandler(sys.stdout) my_handler.push_application() warn(cc(':red: :yin_yang: This is a warning :reset:')) import os from logbook import Processor def inject_cwd(record): record.extra['cwd'] = os.getcwd() with my_handler.applicationbound(): with Processor(inject_cwd).applicationbound(): warn(cc(':blue: :yin_yang: This is a warning'))