# follow instructions here to enable API & generate credentials
# https://www.twilio.com/blog/2017/02/an-easy-way-to-read-and-write-to-a-google-spreadsheet-in-python.html
creds = ServiceAccountCredentials.from_json_keyfile_name(
    args.creds, 'https://www.googleapis.com/auth/drive')
client = gspread.authorize(creds)
sheet = client.open_by_key(args.sheet)
for tab in sheet.worksheets():
    if tab.title == schema_name:
        sheet.del_worksheet(tab)
tab = sheet.add_worksheet(title=schema_name, rows='100', cols='52')

abcs = string.ascii_uppercase
cell_grid = list(abcs) + ['A' + i for i in abcs]

connection = lattice.Connection(args.mode)
server = connection.server

# grab the OntologyTerm term_name & term_id schemas to put in places that linkTo OntologyTerm
ont_schema_url = urljoin(server, 'profiles/ontology_term/?format=json')
ont_schema = requests.get(ont_schema_url).json()
term_id_props = ont_schema['properties']['term_id']
term_name_props = ont_schema['properties']['term_name']

# grab all of the submittable properties
props = {}
schema_url = urljoin(server, 'profiles/' + schema_name + '/?format=json')
schema = requests.get(schema_url).json()
for p in schema['properties'].keys():
    props[p] = schema['properties'][p]
def main():
    summary_report = []
    args = getArgs()
    if not args.mode:
        sys.exit('ERROR: --mode is required')
    connection = lattice.Connection(args.mode)
    server = connection.server
    print('Running on {server}'.format(server=server))
    if not os.path.isfile(args.infile):
        sys.exit(
            'ERROR: file {filename} not found!'.format(filename=args.infile))
    book = load_workbook(args.infile)
    names = {}
    if args.justtype:
        if args.starttype:
            sys.exit('ERROR: cannot specify both --justtype and --starttype')
        else:
            for sheet in book.sheetnames:
                if sheet.lower().replace('_',
                                         '') == args.justtype.lower().replace(
                                             '_', ''):
                    names[sheet.lower().replace('_', '')] = sheet
    else:
        for sheet in book.sheetnames:
            names[sheet.lower().replace('_', '')] = sheet
    profiles = requests.get(server + 'profiles/?format=json').json()
    supported_collections = [s.lower() for s in list(profiles.keys())
                             ]  # get accepted object types
    supported_collections.append('cover sheet')
    for n in names.keys():
        if n not in supported_collections:  # check that each sheet name corresponds to an object type
            print(
                'ERROR: Sheet name {name} not part of supported object types!'.
                format(name=n),
                file=sys.stderr)

    ont_schema_url = urljoin(server, 'profiles/ontology_term/?format=json')
    ont_term_schema = requests.get(ont_schema_url).json()['properties']
    ontology_props = []
    for p in ont_term_schema.keys():
        if not str(ont_term_schema[p].get('comment')).startswith('Do not submit') \
        and ont_term_schema[p].get('notSubmittable') != True:
            ontology_props.append(p)

    load_order = ORDER  # pull in the order used to load test inserts on a local instance
    if args.starttype:
        st_index = load_order.index(args.starttype)
        load_order = load_order[st_index:]
    all_posts = {}
    for schema_to_load in load_order:  # go in order to try and get objects posted before they are referenced by another object
        obj_type = schema_to_load.replace('_', '')
        if obj_type in names.keys():
            obj_posts = []
            row_count, rows = reader(book, names[obj_type])

            # remove all columns that do not have any values submitted
            if not args.remove:
                index_to_remove = []
                for i in range(0, len(rows[0])):
                    values = [row[i] for row in rows[1:]]
                    if set(values) == {''}:
                        index_to_remove.append(i)
                index_to_remove.reverse()
                for index in index_to_remove:
                    for row in rows:
                        del row[index]

            headers = rows.pop(0)
            schema_url = urljoin(
                server, 'profiles/' + schema_to_load + '/?format=json')
            schema_properties = requests.get(schema_url).json()['properties']
            invalid_flag = properties_validator(headers, schema_to_load,
                                                schema_properties,
                                                ontology_props)
            if invalid_flag == True:
                print('{}: invalid schema, check the headers'.format(obj_type))
                summary_report.append(
                    '{}: invalid schema, check the headers'.format(obj_type))
                continue
            for row in rows:
                row_count += 1
                post_json = dict(zip(headers, row))
                # convert values to the type specified in the schema, including embedded json objects
                if not args.remove:
                    post_json, post_ont = dict_patcher(post_json,
                                                       schema_properties,
                                                       ont_term_schema)
                    for k, v in post_ont.items():
                        all_posts.setdefault('ontology_term', []).append(
                            (obj_type + '.' + k, v))
                    # add attchments here
                    if post_json.get('attachment'):
                        attach = attachment(post_json['attachment'])
                        post_json['attachment'] = attach
                obj_posts.append((row_count, post_json))
            all_posts[schema_to_load] = obj_posts

    if args.patchall:
        patch_req = True
    else:
        patch_req = False
    for schema in load_order:  # go in order to try and get objects posted before they are referenced by another object
        if all_posts.get(schema):
            total = 0
            error = 0
            success = 0
            patch = 0
            new_accessions_aliases = []
            failed_postings = []
            for row_count, post_json in all_posts[schema]:
                total += 1
                #check for an existing object based on any possible identifier
                temp_id, temp = check_existing_obj(post_json, schema,
                                                   connection)

                if temp.get('uuid'
                            ):  # if there is an existing corresponding object
                    if schema == 'ontology_term':
                        ont_mismatch = False
                        ont_patch = False
                        for k in post_json.keys():
                            if temp.get(k) and post_json[k] != temp.get(k):
                                print(
                                    'ERROR: {}:{} {} of {} does not match existing {}'
                                    .format(row_count, k, post_json[k],
                                            post_json['term_id'], temp.get(k)))
                                ont_mismatch = True
                            elif not temp.get(k):
                                ont_patch = True
                        if ont_mismatch == False and ont_patch == True:
                            print(
                                schema.upper() + ' ' + str(row_count) +
                                ':Object {} already exists.  Would you like to patch it instead?'
                                .format(post_json['term_id']))
                            i = input('PATCH? y/n: ')
                            if i.lower() == 'y':
                                patch_req = True
                        elif ont_mismatch == True:
                            print('OntologyTerm {} will not be updated'.format(
                                post_json['term_id']))
                            i = input('EXIT SUBMISSION? y/n: ')
                            if i.lower() == 'y':
                                sys.exit(
                                    '{sheet}: {success} posted, {patch} patched, {error} errors out of {total} total'
                                    .format(sheet=schema.upper(),
                                            success=success,
                                            total=total,
                                            error=error,
                                            patch=patch))
                    elif patch_req == False:  # patch wasn't specified, see if the user wants to patch
                        print(
                            schema.upper() + ' ROW ' + str(row_count) +
                            ':Object {} already exists.  Would you like to patch it instead?'
                            .format(temp_id))
                        i = input('PATCH? y/n: ')
                        if i.lower() == 'y':
                            patch_req = True
                    if patch_req == True and args.remove:
                        existing_json = lattice.get_object(temp['uuid'],
                                                           connection,
                                                           frame="edit")
                        for k in post_json.keys():
                            if k not in ['uuid', 'accession', 'alias', '@id']:
                                if k not in existing_json.keys():
                                    print(
                                        'Cannot remove {}, may be calculated property, or is not submitted'
                                        .format(k))
                                else:
                                    existing_json.pop(k)
                                    print('Removing value:', k)
                        if args.update:
                            e = lattice.replace_object(temp['uuid'],
                                                       connection,
                                                       existing_json)
                            if e['status'] == 'error':
                                error += 1
                            elif e['status'] == 'success':
                                new_patched_object = e['@graph'][0]
                                # Print now and later
                                print(schema.upper() + ' ROW ' +
                                      str(row_count) + ':identifier: {}'.
                                      format((new_patched_object.get(
                                          'accession',
                                          new_patched_object.get('uuid')))))
                                patch += 1
                    elif patch_req == True and args.update:
                        e = lattice.patch_object(temp['uuid'], connection,
                                                 post_json)
                        if e['status'] == 'error':
                            error += 1
                        elif e['status'] == 'success':
                            new_patched_object = e['@graph'][0]
                            # Print now and later
                            print(schema.upper() + ' ROW ' + str(row_count) +
                                  ':identifier: {}'.format(
                                      (new_patched_object.get(
                                          'accession',
                                          new_patched_object.get('uuid')))))
                            patch += 1
                else:  # we have new object to post
                    if args.patchall:
                        print(
                            schema.upper() + ' ROW ' + str(row_count) +
                            ':Object not found. Check identifier or consider removing --patchall to post a new object'
                        )
                        error += 1
                    elif args.update:
                        print(schema.upper() + ' ROW ' + str(row_count) +
                              ':POSTing data!')
                        e = lattice.post_object(schema, connection, post_json)
                        if e['status'] == 'error':
                            error += 1
                            failed_postings.append(
                                schema.upper() + ' ROW ' + str(row_count) +
                                ':' + str(
                                    post_json.get('aliases',
                                                  'alias not specified')))
                        elif e['status'] == 'success':
                            new_object = e['@graph'][0]
                            # Print now and later
                            print(schema.upper() + ' ROW ' + str(row_count) +
                                  ':New accession/UUID: {}'.format((
                                      new_object.get('accession',
                                                     new_object.get('uuid')))))
                            new_accessions_aliases.append(
                                ('ROW ' + str(row_count),
                                 new_object.get('accession',
                                                new_object.get('uuid')),
                                 new_object.get('aliases',
                                                new_object.get('name'))))
                            success += 1

            # Print now and later
            print(
                '{sheet}: {success} posted, {patch} patched, {error} errors out of {total} total'
                .format(sheet=schema.upper(),
                        success=success,
                        total=total,
                        error=error,
                        patch=patch))
            summary_report.append(
                '{sheet}: {success} posted, {patch} patched, {error} errors out of {total} total'
                .format(sheet=schema.upper(),
                        success=success,
                        total=total,
                        error=error,
                        patch=patch))
            if new_accessions_aliases:
                print('New accessions/UUIDs and aliases:')
                for (row, accession, alias) in new_accessions_aliases:
                    if alias == None:
                        alias = 'alias not specified'
                    else:
                        alias = ', '.join(alias) if isinstance(alias,
                                                               list) else alias
                    print(row, accession, alias)
            if failed_postings:
                print('Posting failed for {} object(s):'.format(
                    len(failed_postings)))
                for alias in failed_postings:
                    print(
                        ', '.join(alias) if isinstance(alias, list) else alias)
    print('-------Summary of all objects-------')
    print('\n'.join(summary_report))
def main():
    logging.basicConfig(filename='checkfiles.log', level=logging.INFO)
    logging.info('Started')

    args = getArgs()
    if (args.query or args.accessions) and not args.mode:
        sys.exit('ERROR: --mode is required with --query/--accessions')

    arg_count = 0
    for arg in [args.query, args.accessions, args.s3_file, args.ext_file]:
        if arg:
            arg_count += 1
    if arg_count != 1:
        sys.exit('ERROR: exactly one of --query, --accessions, --s3-file, --ext-file is required, {} given'.format(arg_count))


    if args.mode:
        connection = lattice.Connection(args.mode)
    else:
        connection = ''

    initiating_run = 'STARTING Checkfiles version {}'.format(checkfiles_version)
    logging.info(initiating_run)

    timestr = datetime.now().strftime('%Y_%m_%d-%H_%M_%S')
    report_out = 'report_{}.tsv'.format(timestr)
    logging.info('Writing results to {}'.format(report_out))
    report_headers = '\t'.join([
        'identifier',
        'uri',
        'errors',
        'results',
        'json_patch',
        'Lattice patched?',
        'S3 tag patched?',
        'download_time',
        'check_time',
        'content_md5sum_time'
    ])
    with open(report_out, 'w') as out:
        out.write(report_headers + '\n')

    jobs = fetch_files(report_out, connection, args.query, args.accessions, args.s3_file, args.ext_file, args.file_format, args.include_validated)

    if jobs:
        logging.info('CHECKING {} files'.format(len(jobs)))
        for job in jobs:
            file_obj = job.get('item')
            logging.info('Starting {}'.format(file_obj.get('@id', 'File not in DB')))
            if file_obj.get('external_uri'):
                local_file, job = download_external(job)
            elif file_obj.get('file_format') == 'mex':
                local_file, job = download_s3_directory(job)
            else:
                local_file, job = download_s3_file(job)
            if os.path.exists(local_file):
                check_file(job)
                if not args.s3_file and not args.ext_file:
                    compare_with_db(job, connection)
                    if job['results'].get('flowcell_details') and file_obj.get('derived_from'):
                        dets = job['results']['flowcell_details']
                        sorted_dets = sorted(dets, key=lambda k: (k.get('machine'), k.get('flowcell'), k.get('lane')))
                    if job['post_json'] and not job['errors'] and args.update:
                        logging.info('PATCHING {}'.format(file_obj.get('accession')))
                        patch = lattice.patch_object(file_obj.get('accession'), connection, job['post_json'])
                        job['patch_result'] = patch['status']
                        if file_obj.get('s3_uri'):
                            set_s3_tags(job)
            out = open(report_out, 'a')
            out.write(report(job))
            out.close()

        finishing_run = 'FINISHED Checkfiles at {}'.format(datetime.now())
        logging.info(finishing_run)
    else:
        logging.info('FINISHED No files to check, see report*.tsv for details')

    logging.info('Results written to {}'.format(report_out))
    logging.info('Finished')