def to_filename(oid): try: return s3_client.get_object( Bucket=os.environ.get('BUCKET_NAME'), Key=f'resumes/{oid}.pdf').get('Metadata').get('filename') except: return None
def download_file(filename, filetype): the_object = s3_client.get_object(Bucket=config.BUCKET_NAME, Key=filename) bytes = the_object['Body'].read() encoded_bytes = base64.b64encode(bytes).decode('utf-8') return {'file': encoded_bytes, 'type': filetype, 'name': filename, 'lastmodified': the_object['LastModified']}
def get_common_package_data_for_all(): my_timing = TimingMessages() try: # print u"trying to load in pickle" # my_data = decompress_pickle("data/get_common_package_data_for_all") # print u"found pickled, returning" # return (my_data, my_timing) s3_clientobj = s3_client.get_object( Bucket="unsub-cache", Key="get_common_package_data_for_all.json") contents_string = s3_clientobj["Body"].read().decode("utf-8") contents_json = json.loads(contents_string) return (contents_json, my_timing) except Exception as e: print u"no pickle data, so computing. Error message: ", e pass my_data = {} my_data["journal_era_subjects"] = get_journal_era_subjects() my_timing.log_timing("get_journal_era_subjects") my_data["embargo_dict"] = get_embargo_data_from_db() my_timing.log_timing("get_embargo_data_from_db") my_data["unpaywall_downloads_dict_raw"] = get_unpaywall_downloads_from_db() my_timing.log_timing("get_unpaywall_downloads_from_db") my_data["social_networks"] = get_social_networks_data_from_db() my_timing.log_timing("get_social_networks_data_from_db") my_data["oa_recent"] = get_oa_recent_data_from_db() my_timing.log_timing("get_oa_recent_data_from_db") my_data["oa"] = get_oa_data_from_db() my_timing.log_timing("get_oa_data_from_db") # add this in later # my_data["oa_adjustment"] = get_oa_adjustment_data_from_db() # my_timing.log_timing("get_oa_adjustment_data_from_db") my_data["society"] = get_society_data_from_db() my_timing.log_timing("get_society_data_from_db") my_data["num_papers"] = get_num_papers_from_db() my_timing.log_timing("get_num_papers_from_db") # compressed_pickle("data/get_common_package_data_for_all", my_data) # my_timing.log_timing("pickling") my_data["_timing_common"] = my_timing.to_dict() print "my timing" print my_timing.to_dict() return (my_data, my_timing)
def to_info(oid): try: file = s3_client.get_object(Bucket=os.environ.get('BUCKET_NAME'), Key=f'resumes/{oid}.pdf') profile = current_user.data.get('profile') full_name = f'[{profile["name"]} {profile["first_name"]}' r = { 'url': get_resume_url(oid, full_name), 'size': file.get('ContentLength'), 'name': file.get('Metadata').get('filename'), 'oid': str(oid) } return json.dumps(r) except: return json.dumps({'empty': True})
def download_file(filename, filetype): # file_flag = memcache_client.get('file_'+filename) # if file_flag is None: # s3_client.download_file(Bucket=config.BUCKET_NAME, Key=filename, Filename='/www/meowbook.org/public/'+filename) # memcache_client.set('file_'+filename, 1) # return {'code': 200, 'text': 'file '+filename+' downloaded', 'name': filename, 'type': filetype} # return {'code': 200, 'text': 'file '+filename+' taken from server', 'name': filename, 'type': filetype} the_object = s3_client.get_object(Bucket=config.BUCKET_NAME, Key=filename) bytes = the_object['Body'].read() encoded_bytes = base64.b64encode(bytes).decode('utf-8') return { 'file': encoded_bytes, 'type': filetype, 'name': filename, 'lastmodified': the_object['LastModified'] }
def download_file(filename): return (s3_client.get_object( Bucket="2018-stasyev-denis-bucket", Key=filename).get("Body").read().decode("utf-8"))
def parse_uploads(): while True: try: command = u"""select * from jump_raw_file_upload_object where to_delete_date is not null""" with get_db_cursor() as cursor: cursor.execute(command) raw_file_upload_rows_to_delete = cursor.fetchall() for row_to_delete in raw_file_upload_rows_to_delete: file = row_to_delete["file"] package_id = row_to_delete["package_id"] if file == "price": JournalPriceInput().delete(package_id) elif file == "perpetual-access": PerpetualAccessInput().delete(package_id) else: report_name = "jr1" if "-" in file: report_name = file.split("-")[1] CounterInput().delete(package_id, report_name=report_name) # the delete will also delete the raw row which will take it off this queue except Exception as e: print "Error: exception1 {} during parse_uploads".format(e) try: db.session.rollback() except: pass try: upload_preprocess_bucket = "unsub-file-uploads-preprocess" upload_finished_bucket = "unsub-file-uploads" preprocess_file_list = s3_client.list_objects( Bucket=upload_preprocess_bucket) for preprocess_file in preprocess_file_list.get("Contents", []): filename = preprocess_file["Key"] filename_base = filename.split(".")[0] try: package_id, filetype = filename_base.split("_") except ValueError: # not a valid file, skip it continue print u"loading {} {}".format(package_id, filetype) size = preprocess_file["Size"] age_seconds = (datetime.datetime.utcnow() - preprocess_file["LastModified"].replace( tzinfo=None)).total_seconds() s3_clientobj = s3_client.get_object( Bucket="unsub-file-uploads-preprocess", Key=filename) contents_string = s3_clientobj["Body"].read() with open(filename, "wb") as temp_file: temp_file.write(contents_string) loader = None if filetype.startswith("counter"): loader = CounterInput() elif filetype.startswith("perpetual-access"): loader = PerpetualAccessInput() elif filetype.startswith("price"): loader = JournalPriceInput() if loader: load_result = loader.load(package_id, filename, commit=True) print u"moving file {}".format(filename) s3_resource = boto3.resource("s3") copy_source = { "Bucket": upload_preprocess_bucket, "Key": filename } s3_resource.meta.client.copy(copy_source, upload_finished_bucket, filename) s3_resource.Object(upload_preprocess_bucket, filename).delete() print "moved" except Exception as e: print u"Error: exception2 {} during parse_uploads on file {}".format( e, filename) if loader and package_id and filename: load_result = loader.load(package_id, filename, commit=True) print u"because of error, deleting file {}".format(filename) s3_resource = boto3.resource("s3") s3_resource.Object(upload_preprocess_bucket, filename).delete() print u"because of error, deleted {}".format(filename) try: db.session.rollback() except: pass sleep(2 * random.random())
filenames = [ "smu_SD_tr_j2_2020-01_2020-12.json", "smu_SD_tr_j3_2020-01_2020-12.json", "smu_SD_tr_j4_2020-01_2020-12.json" ] print filenames print len(filenames) for filename in filenames: input_string_list = [] input_dict = {} print filename s3_clientobj = s3_client.get_object(Bucket="unsub-jisc", Key=filename) contents_string = s3_clientobj["Body"].read().decode("utf-8") contents_json = json.loads(contents_string) report_type = contents_json["Report_Header"]["Report_ID"] institution_name = contents_json["Report_Header"]["Institution_Name"] report_items = contents_json.get("Report_Items", []) print report_type, institution_name, len(report_items) input_dict["package_id"] = u"package-jiscels{}".format(filename[0:3]) input_dict["report_year"] = 2020 input_dict["report_version"] = "5" if "tr_j2" in filename: input_dict["report_name"] = "trj2"
def download_file(filename): response = s3_client.get_object(Bucket=settings.S3_BUCKET_NAME, Key=filename) content = response.get('Body').read().decode('utf8') print(content, dir(response)) return content