Exemple #1
0
def to_filename(oid):
    try:
        return s3_client.get_object(
            Bucket=os.environ.get('BUCKET_NAME'),
            Key=f'resumes/{oid}.pdf').get('Metadata').get('filename')
    except:
        return None
Exemple #2
0
def download_file(filename, filetype):
    the_object = s3_client.get_object(Bucket=config.BUCKET_NAME, Key=filename)
    bytes = the_object['Body'].read()

    encoded_bytes = base64.b64encode(bytes).decode('utf-8')


    return {'file': encoded_bytes, 'type': filetype, 'name': filename, 'lastmodified': the_object['LastModified']}
Exemple #3
0
def get_common_package_data_for_all():
    my_timing = TimingMessages()
    try:
        # print u"trying to load in pickle"
        # my_data = decompress_pickle("data/get_common_package_data_for_all")
        # print u"found pickled, returning"
        # return (my_data, my_timing)

        s3_clientobj = s3_client.get_object(
            Bucket="unsub-cache", Key="get_common_package_data_for_all.json")
        contents_string = s3_clientobj["Body"].read().decode("utf-8")
        contents_json = json.loads(contents_string)
        return (contents_json, my_timing)

    except Exception as e:
        print u"no pickle data, so computing.  Error message: ", e
        pass

    my_data = {}

    my_data["journal_era_subjects"] = get_journal_era_subjects()
    my_timing.log_timing("get_journal_era_subjects")

    my_data["embargo_dict"] = get_embargo_data_from_db()
    my_timing.log_timing("get_embargo_data_from_db")

    my_data["unpaywall_downloads_dict_raw"] = get_unpaywall_downloads_from_db()
    my_timing.log_timing("get_unpaywall_downloads_from_db")

    my_data["social_networks"] = get_social_networks_data_from_db()
    my_timing.log_timing("get_social_networks_data_from_db")

    my_data["oa_recent"] = get_oa_recent_data_from_db()
    my_timing.log_timing("get_oa_recent_data_from_db")

    my_data["oa"] = get_oa_data_from_db()
    my_timing.log_timing("get_oa_data_from_db")

    # add this in later
    # my_data["oa_adjustment"] = get_oa_adjustment_data_from_db()
    # my_timing.log_timing("get_oa_adjustment_data_from_db")

    my_data["society"] = get_society_data_from_db()
    my_timing.log_timing("get_society_data_from_db")

    my_data["num_papers"] = get_num_papers_from_db()
    my_timing.log_timing("get_num_papers_from_db")

    # compressed_pickle("data/get_common_package_data_for_all", my_data)
    # my_timing.log_timing("pickling")

    my_data["_timing_common"] = my_timing.to_dict()
    print "my timing"
    print my_timing.to_dict()

    return (my_data, my_timing)
Exemple #4
0
def to_info(oid):
    try:
        file = s3_client.get_object(Bucket=os.environ.get('BUCKET_NAME'),
                                    Key=f'resumes/{oid}.pdf')
        profile = current_user.data.get('profile')
        full_name = f'[{profile["name"]} {profile["first_name"]}'
        r = {
            'url': get_resume_url(oid, full_name),
            'size': file.get('ContentLength'),
            'name': file.get('Metadata').get('filename'),
            'oid': str(oid)
        }
        return json.dumps(r)
    except:
        return json.dumps({'empty': True})
Exemple #5
0
def download_file(filename, filetype):
    # file_flag = memcache_client.get('file_'+filename)
    # if file_flag is None:
    # s3_client.download_file(Bucket=config.BUCKET_NAME, Key=filename, Filename='/www/meowbook.org/public/'+filename)
    #     memcache_client.set('file_'+filename, 1)
    #     return {'code': 200, 'text': 'file '+filename+' downloaded', 'name': filename, 'type': filetype}

    # return {'code': 200, 'text': 'file '+filename+' taken from server', 'name': filename, 'type': filetype}

    the_object = s3_client.get_object(Bucket=config.BUCKET_NAME, Key=filename)
    bytes = the_object['Body'].read()
    encoded_bytes = base64.b64encode(bytes).decode('utf-8')
    return {
        'file': encoded_bytes,
        'type': filetype,
        'name': filename,
        'lastmodified': the_object['LastModified']
    }
def download_file(filename):
    return (s3_client.get_object(
        Bucket="2018-stasyev-denis-bucket",
        Key=filename).get("Body").read().decode("utf-8"))
def parse_uploads():

    while True:
        try:
            command = u"""select * from jump_raw_file_upload_object where to_delete_date is not null"""
            with get_db_cursor() as cursor:
                cursor.execute(command)
                raw_file_upload_rows_to_delete = cursor.fetchall()
            for row_to_delete in raw_file_upload_rows_to_delete:
                file = row_to_delete["file"]
                package_id = row_to_delete["package_id"]
                if file == "price":
                    JournalPriceInput().delete(package_id)
                elif file == "perpetual-access":
                    PerpetualAccessInput().delete(package_id)
                else:
                    report_name = "jr1"
                    if "-" in file:
                        report_name = file.split("-")[1]
                    CounterInput().delete(package_id, report_name=report_name)
                # the delete will also delete the raw row which will take it off this queue

        except Exception as e:
            print "Error: exception1 {} during parse_uploads".format(e)
            try:
                db.session.rollback()
            except:
                pass

        try:
            upload_preprocess_bucket = "unsub-file-uploads-preprocess"
            upload_finished_bucket = "unsub-file-uploads"
            preprocess_file_list = s3_client.list_objects(
                Bucket=upload_preprocess_bucket)
            for preprocess_file in preprocess_file_list.get("Contents", []):
                filename = preprocess_file["Key"]
                filename_base = filename.split(".")[0]
                try:
                    package_id, filetype = filename_base.split("_")
                except ValueError:
                    # not a valid file, skip it
                    continue

                print u"loading {} {}".format(package_id, filetype)
                size = preprocess_file["Size"]
                age_seconds = (datetime.datetime.utcnow() -
                               preprocess_file["LastModified"].replace(
                                   tzinfo=None)).total_seconds()

                s3_clientobj = s3_client.get_object(
                    Bucket="unsub-file-uploads-preprocess", Key=filename)
                contents_string = s3_clientobj["Body"].read()
                with open(filename, "wb") as temp_file:
                    temp_file.write(contents_string)

                loader = None
                if filetype.startswith("counter"):
                    loader = CounterInput()
                elif filetype.startswith("perpetual-access"):
                    loader = PerpetualAccessInput()
                elif filetype.startswith("price"):
                    loader = JournalPriceInput()

                if loader:
                    load_result = loader.load(package_id,
                                              filename,
                                              commit=True)

                    print u"moving file {}".format(filename)
                    s3_resource = boto3.resource("s3")
                    copy_source = {
                        "Bucket": upload_preprocess_bucket,
                        "Key": filename
                    }
                    s3_resource.meta.client.copy(copy_source,
                                                 upload_finished_bucket,
                                                 filename)
                    s3_resource.Object(upload_preprocess_bucket,
                                       filename).delete()
                    print "moved"

        except Exception as e:
            print u"Error: exception2 {} during parse_uploads on file {}".format(
                e, filename)
            if loader and package_id and filename:
                load_result = loader.load(package_id, filename, commit=True)
                print u"because of error, deleting file {}".format(filename)
                s3_resource = boto3.resource("s3")
                s3_resource.Object(upload_preprocess_bucket, filename).delete()
                print u"because of error, deleted {}".format(filename)

            try:
                db.session.rollback()
            except:
                pass

        sleep(2 * random.random())
filenames = [
    "smu_SD_tr_j2_2020-01_2020-12.json", "smu_SD_tr_j3_2020-01_2020-12.json",
    "smu_SD_tr_j4_2020-01_2020-12.json"
]

print filenames
print len(filenames)

for filename in filenames:
    input_string_list = []
    input_dict = {}

    print filename

    s3_clientobj = s3_client.get_object(Bucket="unsub-jisc", Key=filename)
    contents_string = s3_clientobj["Body"].read().decode("utf-8")

    contents_json = json.loads(contents_string)

    report_type = contents_json["Report_Header"]["Report_ID"]
    institution_name = contents_json["Report_Header"]["Institution_Name"]

    report_items = contents_json.get("Report_Items", [])
    print report_type, institution_name, len(report_items)

    input_dict["package_id"] = u"package-jiscels{}".format(filename[0:3])
    input_dict["report_year"] = 2020
    input_dict["report_version"] = "5"
    if "tr_j2" in filename:
        input_dict["report_name"] = "trj2"
Exemple #9
0
def download_file(filename):
    response = s3_client.get_object(Bucket=settings.S3_BUCKET_NAME,
                                    Key=filename)
    content = response.get('Body').read().decode('utf8')
    print(content, dir(response))
    return content