def main():
    """
    The main routine which kicks everything off
    :return:
    """

    # Setup the command line arguments
    flags = argparse.ArgumentParser(description="Tool to validate and fix errors in CSV files for TADC imports")
    flags.add_argument('csv_file', type=str, help="Path to a CSV file to validate")
    flags.add_argument('header_rows', type=str, help="Number of header rows")
    flags.add_argument('--fix-missing', '-f', action='store_true', help="Fix missing fields by inserting the value 'unknown'")
    flags.add_argument('--output-dir', '-o', type=str, help='Where to put output files', default=os.getcwd())
    flags.add_argument('--log-dir', '-l', type=str, help='Where to put log files', default='/tmp')
    flags.add_argument('--log-level', type=str, help='Choose a log level', default='INFO')
    flags.add_argument('--old-date-format', type=str, help="the format of dates that will be fixed", default='%d/%m/%Y')
    args = flags.parse_args()

    log_filename = os.path.join(
            args.log_dir,
            'tadc_import_validator_{}.log'.format(os.path.basename(time.strftime('%Y%m%d-%H%M%S')))
        )

    # register some logging handlers
    log_handler = FileHandler(
        log_filename,
        mode='w',
        level=args.log_level,
        bubble=True
    )
    stdout_handler = StreamHandler(sys.stdout, level=args.log_level, bubble=True)

    with stdout_handler.applicationbound():
        with log_handler.applicationbound():
            log.info("Arguments: {}".format(args))
            start = time.time()
            log.info("starting at {}".format(time.strftime('%l:%M%p %Z on %b %d, %Y')))

            with CSVFileValidator(
                    csv_file=args.csv_file,
                    header_rows=args.header_rows,
                    output_dir=args.output_dir,
                    old_date_format=args.old_date_format,
                    fix_missing=args.fix_missing) as validator:
                validator.validate_file()
                log.info("Running time: {}".format(str(datetime.timedelta(seconds=(round(time.time() - start, 3))))))
                log.info("Log written to {}:".format(log_filename))
                log.info("Fixed data is in: {}".format(validator.get_fixed_filename()))
예제 #2
0
def main(docs, bundle, build_dir, tmp_dir, recreate, debug, w):
    '''
    annotatedocs analyzes your sphinx-based documentation and provides helpful
    feedback about the quality and possible improvements.

    The first argument should be the path to where your documentation lives
    (e.g. the one in which you usually call 'make html').

    If you leave the first argument empty it defaults to the current working
    directory.

    The build will usually be written to <your docs dir>/_build/annotatedhtml/.
    You can change the output directory with the -b option.
    '''

    # Ignore SIG_PIPE so that piping works correctly.
    signal(SIGPIPE, SIG_DFL)

    if debug:
        log_level = 'DEBUG'
    else:
        log_level = 'INFO'

    null_handler = NullHandler(level='DEBUG')
    log_handler = StreamHandler(sys.stderr, level=log_level)
    with null_handler.applicationbound():
        with log_handler.applicationbound():

            loader = get_loader(docs,
                                build_dir=build_dir,
                                tmp_dir=tmp_dir)

            if recreate:
                loader.cleanup()
            loader.setup()
            confoverrides = {}
            if bundle:
                confoverrides['annotatedocs_bundle'] = bundle
            index_file = loader.build(confoverrides=confoverrides)

            if w:
                webbrowser.open(index_file)
예제 #3
0
def run():
    raven_client = Client()
    logger = Logger("ardegra")
    spider_name = " ".join(sys.argv[1:])
    client = pymongo.MongoClient("mongodb://{}/ardegra".format(
        Config.DATABASE_ADDRESS))
    logger.debug("Running: {}".format(spider_name))

    try:
        db = client["ardegra"]
        document = db.spiders.find_one({"name": spider_name})

        if document["type"]["name"] == "Forum Spider 1":
            spider = ForumSpider1(name=spider_name)
        elif document["type"]["name"] == "News Spider 1":
            spider = NewsSpider1(name=spider_name)
        elif document["type"]["name"] == "News Spider 2":
            spider = NewsSpider2(name=spider_name)
        spider.run()
    except Exception as err:
        raven_client.captureException()
        logger.error(str(err))
    finally:
        client.close()


if __name__ == "__main__":
    handler = StreamHandler(sys.stdout)
    with handler.applicationbound():
        run()
def main():
    """printing argument we got to test"""
    #print_arguments(args=args)
    file_count = 0
    page_count = 0
    upload_count = 0

    log_handler= setup_logging()

    stdout_handler = StreamHandler(sys.stdout, level=args.loglevel, bubble=True)

    with stdout_handler.applicationbound():
        with log_handler.applicationbound():
            log.info("MM Python log file test")
            log.info("Arguments: {}".format(args))
            start = time.time()
            log.info("starting at {}".format(time.strftime('%I:%M%p %Z on %b %d, %Y')),start)
            """get credentials and build authorized google drive service"""
            http = get_authorized_google_http()
            gdrive_service = discovery.build('drive', 'v3', http=http)
            #create s3 client for upload
            s3client = create_aws_client()

            # load up a match file if we have one.


            """getting files in the selected folder passed in the argument"""
            query = gdrive_service.files().list(
              pageSize=args.pagesize,q="'{}' in parents".format(args.folder_id),fields="nextPageToken, files(id, name)")


            #get all the files and read byte
            while query is not None:
                try:
                    result_gfiles = query.execute(http=http)
                    page_count += 1
                    print("\nwhile loop query, page number:" + str(page_count))

                    #printing files in retrieived list to test
                    #print_allFiles(result=result_gfiles)
                    # determine the page at which to start processing as per user input
                    if page_count >= args.startpage:
                        log.info(u"######## Page {} ########".format(page_count))
                        for this_file in result_gfiles.get('files', []):
                            print('\n Number of file processed: ' + str(file_count))
                            log.info('Current file ID and name under process: ' +this_file['id'] + ',' + this_file['name'])
                            file_count+=1
                            if args.matchfile != None:
                                log.info('Match file parameter passed by user:'******'name'], args.matchfile):
                                    log.info('match found:' + this_file['name'])
                                    # download from gdrive
                                    tempbyte = download_from_Gdrive(gdrive_service, this_file)
                                    # upload to s3
                                    val=upload_to_s3(args.bucket, args.keyprefix, tempbyte, s3client, this_file)
                                    if val==True:
                                        log.info('Upload complete for file:'+ this_file['name'])
                                        upload_count += 1

                                    break

                                else:
                                    log.info('File not macthed, file from gdrive: ' + this_file['name'] + ',  matching file name by user( '+ args.matchfile + ') not found on page-' + str(page_count))

                            else:
                                log.info('Match file parameter not passed by user,uploading all file on this page:'+ str(page_count) )
                                #download from gdrive
                                tempbyte = download_from_Gdrive(gdrive_service,this_file)
                                #upload to s3
                                val= upload_to_s3(args.bucket,args.keyprefix,tempbyte,s3client,this_file)
                                if val == True:
                                    log.info('Upload complete for file:' + this_file['name'])
                                    upload_count += 1


                    # stop if we have come to the last user specified page
                    if args.endpage and page_counter == args.endpage:
                         log.info(u"User defined engapage reached, finished paging at page {}".format(page_counter))
                         readbyte.close()
                         log.info("Running time: {}".format(str(datetime.timedelta(seconds=(round(time.time() - start, 3))))))
                         log.info('Total files processed:' + str(file_count))
                         log.info('Total files uploaded: '+ str(upload_count))
                         break
                    page_token = result_gfiles.get('nextPageToken')
                    if not page_token:
                        print('\nLast page token')
                        log.info('Reached the end of pagination, All files traversed')
                        readbyte.close()
                         log.info("Running time: {}".format(str(datetime.timedelta(seconds=(round(time.time() - start, 3))))))
                         log.info('Total files processed:' + str(file_count))
                         log.info('Total files uploaded: '+ str(upload_count))
                        break
                except (Exception) as e:
                    log.error ('An error occurred: '+ str(e) )
                    readbyte.close()
                    log.error(e)
                    break

    print('\n program ended')
   
    readbyte.close()
예제 #5
0
    return args, state_dict, resume


if __name__ == '__main__':
    args, state_dict, resume = prepare()

    # redirect stdout and stderr to log file
    # redirection = open(log_name, 'a', buffering=1)
    # sys.stdout = redirection
    # sys.stderr = redirection

    stdout_handler = StreamHandler(sys.stdout, bubble=True)
    stderr_handler = StderrHandler(level=WARNING)
    # write logs to log.MODEL file
    # file_handler = FileHandler(log_name, bubble=True)
    # file_handler.format_string = '{record.message},{record.extra[cwd]}'
    # file_handler.format_string = '[{record.time:%Y-%m-%d %H:%M:%S.%f%z}] {record.level_name}: {record.message}'
    # with file_handler.applicationbound():
    stdout_handler.format_string = '[{record.time:%Y-%m-%d %H:%M:%S.%f%z}] ' \
                                   '{record.level_name}: {record.message}'
    with stdout_handler.applicationbound():
        if resume:
            logger.info(
                f'Resume training from checkpoint: {Loader.get_latest(args.model)[1]}'
            )

        try:
            main(args)
        except Exception as e:
            logger.error(f'\n{traceback.format_exc()}')
def main():
    """Shows basic usage of the Google Drive API.

    Creates a Google Drive API service object and outputs the names and IDs
    for up to 10 files.
    """

    log_filename = os.path.join(
        args.log_dir,
        'google-drive-to-s3-{}.log'.format(os.path.basename(time.strftime('%Y%m%d-%H%M%S')))
    )

    # register some logging handlers
    log_handler = FileHandler(
        log_filename,
        mode='w',
        level=args.log_level,
        bubble=True
    )
    stdout_handler = StreamHandler(sys.stdout, level=args.log_level, bubble=True)

    with stdout_handler.applicationbound():
        with log_handler.applicationbound():
            log.info("Arguments: {}".format(args))
            start = time.time()
            log.info("starting at {}".format(time.strftime('%l:%M%p %Z on %b %d, %Y')))

            credentials = get_credentials()
            http = credentials.authorize(httplib2.Http())
            drive_service = discovery.build('drive', 'v3', http=http)

            s3 = boto3.resource('s3')

            # load up a match file if we have one.
            if args.match_file:
                with open(args.match_file, 'r') as f:
                    match_filenames = f.read().splitlines()
            else:
                match_filenames = None

            # get the files in the specified folder.
            files = drive_service.files()
            request = files.list(
                pageSize=args.page_size,
                q="'{}' in parents".format(args.folder_id),
                fields="nextPageToken, files(id, name)"
            )

            # make sure our S3 Key prefix has a trailing slash
            key_prefix = ensure_trailing_slash(args.key_prefix)

            page_counter = 0
            file_counter = 0
            while request is not None:
                file_page = request.execute(http=http)
                page_counter += 1
                page_file_counter = 0  # reset the paging file counter

                # determine the page at which to start processing.
                if page_counter >= args.start_page:
                    log.info(u"######## Page {} ########".format(page_counter))

                    for this_file in file_page['files']:
                        file_counter += 1
                        page_file_counter += 1
                        if we_should_process_this_file(this_file['name'], match_filenames):
                            log.info(u"#== Processing {} file number {} on page {}. {} files processed.".format(
                                this_file['name'],
                                page_file_counter,
                                page_counter,
                                file_counter
                            ))

                            # download the file
                            download_request = drive_service.files().get_media(fileId=this_file['id'])
                            fh = io.BytesIO()  # Using an in memory stream location
                            downloader = MediaIoBaseDownload(fh, download_request)
                            done = False
                            pbar = InitBar(this_file['name'])
                            while done is False:
                                status, done = downloader.next_chunk()
                                pbar(int(status.progress()*100))
                                # print("\rDownload {}%".format(int(status.progress() * 100)))
                            del pbar

                            # upload to bucket
                            log.info(u"Uploading to S3")
                            s3.Bucket(args.bucket).put_object(
                                Key="{}{}".format(key_prefix, this_file['name']),
                                Body=fh.getvalue(),
                                ACL='public-read'
                            )
                            log.info(u"Uploaded to S3")
                            fh.close()  # close the file handle to release memory
                        else:
                            log.info(u"Do not need to process {}".format(this_file['name']))

                # stop if we have come to the last user specified page
                if args.end_page and page_counter == args.end_page:
                    log.info(u"Finished paging at page {}".format(page_counter))
                    break
                # request the next page of files
                request = files.list_next(request, file_page)

            log.info("Running time: {}".format(str(datetime.timedelta(seconds=(round(time.time() - start, 3))))))
            log.info("Log written to {}:".format(log_filename))
def main():
    """
    Copy a folder from Source to Target

    """

    log_filename = os.path.join(
        args.log_dir, 'copy-google-drive-folder-{}.log'.format(
            os.path.basename(time.strftime('%Y%m%d-%H%M%S'))))

    # register some logging handlers
    log_handler = FileHandler(log_filename,
                              mode='w',
                              level=args.log_level,
                              bubble=True)
    stdout_handler = StreamHandler(sys.stdout,
                                   level=args.log_level,
                                   bubble=True)

    with stdout_handler.applicationbound():
        with log_handler.applicationbound():
            log.info("Arguments: {}".format(args))
            start = time.time()
            log.info("starting at {}".format(
                time.strftime('%l:%M%p %Z on %b %d, %Y')))

            credentials = get_credentials()
            http = credentials.authorize(httplib2.Http())
            drive_service = discovery.build('drive', 'v3', http=http)

            # get the files in the specified folder.
            files = drive_service.files()
            request = files.list(
                pageSize=args.page_size,
                q="'{}' in parents".format(args.source_folder_id),
                fields="nextPageToken, files(id, name, mimeType)")

            page_counter = 0
            file_counter = 0
            while request is not None:
                file_page = request.execute(http=http)
                page_counter += 1
                page_file_counter = 0  # reset the paging file counter

                # determine the page at which to start processing.
                if page_counter >= args.start_page:
                    log.info(u"######## Page {} ########".format(page_counter))

                    for this_file in file_page['files']:
                        file_counter += 1
                        page_file_counter += 1
                        log.info(
                            u"#== Processing {} {} file number {} on page {}. {} files processed."
                            .format(this_file['mimeType'], this_file['name'],
                                    page_file_counter, page_counter,
                                    file_counter))

                        # if not a folder
                        if this_file[
                                'mimeType'] != 'application/vnd.google-apps.folder':
                            # Copy the file
                            new_file = {'title': this_file['name']}
                            copied_file = drive_service.files().copy(
                                fileId=this_file['id'],
                                body=new_file).execute()
                            # move it to it's new location
                            drive_service.files().update(
                                fileId=copied_file['id'],
                                addParents=args.target_folder_id,
                                removeParents=args.source_folder_id).execute()
                        else:
                            log.info(u"Skipped Folder")

                else:
                    log.info(u"Skipping Page {}".format(page_counter))

                # stop if we have come to the last user specified page
                if args.end_page and page_counter == args.end_page:
                    log.info(
                        u"Finished paging at page {}".format(page_counter))
                    break

                # request the next page of files
                request = files.list_next(request, file_page)

            log.info("Running time: {}".format(
                str(datetime.timedelta(seconds=(round(time.time() -
                                                      start, 3))))))
            log.info("Log written to {}:".format(log_filename))
def main():
    """
    Copy a folder from Source to Target

    """

    log_filename = os.path.join(
        args.log_dir,
        'copy-google-drive-folder-{}.log'.format(os.path.basename(time.strftime('%Y%m%d-%H%M%S')))
    )

    # register some logging handlers
    log_handler = FileHandler(
        log_filename,
        mode='w',
        level=args.log_level,
        bubble=True
    )
    stdout_handler = StreamHandler(sys.stdout, level=args.log_level, bubble=True)

    with stdout_handler.applicationbound():
        with log_handler.applicationbound():
            log.info("Arguments: {}".format(args))
            start = time.time()
            log.info("starting at {}".format(time.strftime('%l:%M%p %Z on %b %d, %Y')))

            credentials = get_credentials()
            http = credentials.authorize(httplib2.Http())
            drive_service = discovery.build('drive', 'v3', http=http)

            # get the files in the specified folder.
            files = drive_service.files()
            request = files.list(
                pageSize=args.page_size,
                q="'{}' in parents".format(args.source_folder_id),
                fields="nextPageToken, files(id, name, mimeType)"
            )

            page_counter = 0
            file_counter = 0
            while request is not None:
                file_page = request.execute(http=http)
                page_counter += 1
                page_file_counter = 0  # reset the paging file counter

                # determine the page at which to start processing.
                if page_counter >= args.start_page:
                    log.info(u"######## Page {} ########".format(page_counter))

                    for this_file in file_page['files']:
                        file_counter += 1
                        page_file_counter += 1
                        log.info(u"#== Processing {} {} file number {} on page {}. {} files processed.".format(
                            this_file['mimeType'],
                            this_file['name'],
                            page_file_counter,
                            page_counter,
                            file_counter
                        ))

                        # if not a folder
                        if this_file['mimeType'] != 'application/vnd.google-apps.folder':
                            # Copy the file
                            new_file = {'title': this_file['name']}
                            copied_file = drive_service.files().copy(fileId=this_file['id'], body=new_file).execute()
                            # move it to it's new location
                            drive_service.files().update(
                                fileId=copied_file['id'],
                                addParents=args.target_folder_id,
                                removeParents=args.source_folder_id
                            ).execute()
                        else:
                            log.info(u"Skipped Folder")

                else:
                    log.info(u"Skipping Page {}".format(page_counter))

                # stop if we have come to the last user specified page
                if args.end_page and page_counter == args.end_page:
                    log.info(u"Finished paging at page {}".format(page_counter))
                    break

                # request the next page of files
                request = files.list_next(request, file_page)

            log.info("Running time: {}".format(str(datetime.timedelta(seconds=(round(time.time() - start, 3))))))
            log.info("Log written to {}:".format(log_filename))
예제 #9
0
import sys

# 1. set format_string
sh = StreamHandler(
    sys.stdout,
    format_string=
    "[{record.time}] {record.level_name}: {record.channel}:{record.message}")

# 2. set format_string
sh.format_string = "[{record.time}] {record.level_name}: {record.channel}:{record.message}"


# 3. invoke the format function
def my_format_fun(record, handler):
    return " ".join([
        "[" + str(record.time) + "]",
        record.level_name + ":" + record.channel + ":", record.message
    ])


sh.formatter = my_format_fun


def main():
    info("test")


if __name__ == "__main__":
    with sh.applicationbound():
        main()
예제 #10
0
파일: simple.py 프로젝트: pingf/logcc
from logbook import warn, StreamHandler
import sys

from termcc.cc import cc

my_handler = StreamHandler(sys.stdout)
my_handler.push_application()
warn(cc(':red: :yin_yang: This is a warning :reset:'))

import os
from logbook import Processor


def inject_cwd(record):
    record.extra['cwd'] = os.getcwd()


with my_handler.applicationbound():
    with Processor(inject_cwd).applicationbound():
        warn(cc(':blue: :yin_yang: This is a warning'))