Esempio n. 1
0
def download_prices_from_s3(bucket: ServiceResource, dir_prices: Path,
                            remote_dir_prices: Path, missing_rics: List[str],
                            logger: logging.Logger) -> None:

    dir_prices.mkdir(parents=True, exist_ok=True)

    for ric in missing_rics:

        remote_filename = ric2filename(remote_dir_prices, ric, 'csv.gz')

        basename = remote_filename.name
        dest_parent = dir_prices
        dest = dest_parent / Path(basename)

        if dest.is_file():
            logger.debug('skip downloading {}'.format(basename))
        else:
            logger.debug('start downloading {}'.format(basename))
            try:
                bucket.download_file(Key=str(remote_filename),
                                     Filename=str(dest))
            except ClientError as e:
                code = e.response.get('Error', {}).get('Code', '')
                if str(code) == str(HTTPStatus.NOT_FOUND.value):
                    logger.critical('{} is not found'.format(
                        str(remote_filename)))
            logger.debug('end downloading {}'.format(basename))
Esempio n. 2
0
def download_nikkei_bodies_from_s3(bucket: ServiceResource, dest_dirname: Path,
                                   remote_filenames: List[Path],
                                   logger: logging.Logger) -> None:

    dest_dirname.mkdir(parents=True, exist_ok=True)
    for remote_filename in remote_filenames:
        logger.info('start downloading {}'.format(remote_filename))
        dest = dest_dirname.joinpath(remote_filename.name)
        if not dest.is_file():
            bucket.download_file(Key=str(remote_filename), Filename=str(dest))
        logger.info('start downloading {}'.format(remote_filename))
Esempio n. 3
0
def download_nikkei_headlines_from_s3(bucket: ServiceResource,
                                      cp932zip_dirname: str,
                                      utf8csv_dirname: str,
                                      remote_filenames: List[str],
                                      logger: logging.Logger) -> None:

    os.makedirs(cp932zip_dirname, exist_ok=True)
    os.makedirs(utf8csv_dirname, exist_ok=True)

    for remote_filename in remote_filenames:

        basename = os.path.basename(remote_filename).lower()
        temp_dest = os.path.join(cp932zip_dirname, basename)

        if os.path.isfile(temp_dest):
            logger.debug('skip downloading {}'.format(basename))
        else:
            logger.debug('start downloading {}'.format(basename))
            try:
                bucket.download_file(Key=remote_filename, Filename=temp_dest)
            except ClientError as e:
                code = e.response.get('Error', {}).get('Code', '')
                if str(code) == str(HTTPStatus.NOT_FOUND.value):
                    logger.info('{} is not found'.format(remote_filename))
            logger.debug('end downloading {}'.format(basename))

        infl_filename = re.sub(r'\.zip$', '.csv',
                               basename,
                               flags=re.IGNORECASE)
        match = re.search(r'_([12][0-9]{3})_', infl_filename)
        if match is None:
            raise ValueError
        year = int(match[1])
        utf8_filename = os.path.join(utf8csv_dirname,
                                     'nikkei_headlines_{}.csv'.format(year))

        if os.path.isfile(utf8_filename):
            logger.debug('skip converting {}'.format(utf8_filename))
            continue

        with zipfile.ZipFile(temp_dest, mode='r') as zf:

            logger.debug('start converting {}'.format(utf8_filename))

            with zf.open(infl_filename, mode='r') as cp932_file:
                text = cp932_file.read()

            with open(utf8_filename, mode='wb') as utf8_file:
                utf8_file.write(text.decode('cp932').encode('utf-8'))

            logger.debug('end converting {}'.format(utf8_filename))
Esempio n. 4
0
def download_reuters_articles_from_s3(bucket: ServiceResource,
                                      dest_dirname: Path,
                                      remote_dirnames: List[Path],
                                      logger: logging.Logger) -> None:

    dest_dirname.mkdir(parents=True, exist_ok=True)
    for remote_dirname in remote_dirnames:
        logger.info('start downloading files in {}'.format(remote_dirname))
        summaries = bucket.objects.filter(Prefix=str(remote_dirname))
        for summary in summaries:
            dest = Path(dest_dirname).joinpath(summary.key.split('/')[-1])
            if not summary.key.endswith('/') and not dest.is_file():
                bucket.download_file(Key=summary.key, Filename=str(dest))
        logger.info('end downloading files in {}'.format(remote_dirname))
Esempio n. 5
0
def _create_topic_base(sqs: ServiceResource, topic: str) -> None:
    try:
        try:
            sqs.get_queue_by_name(QueueName='%s.fifo' % topic)
        except sqs.meta.client.exceptions.QueueDoesNotExist:
            sqs.create_queue(QueueName='%s.fifo' % topic,
                             Attributes={
                                 'DelaySeconds': '0',
                                 'MessageRetentionPeriod': '86400',
                                 'FifoQueue': 'true'
                             })
            logging.info('Queue %s created', topic)
    except ClientError:
        pass
def update_metadata(option: dict, dynamodb_resource: ServiceResource) -> dict:
    """
    metadataを更新する
    """
    table = dynamodb_resource.Table(get_table_name())
    resp = table.update_item(**option)
    return resp['Attributes']
def put_make_history_item(item: dict, table_name: str,
                          dynamodb_resource: ServiceResource):
    table = dynamodb_resource.Table(table_name)
    option = {"Item": item}

    resp = table.put_item(**option)
    logger.info("put make history item", item=item, resp=resp)
Esempio n. 8
0
def upload_image_to_s3(
    # See https://github.com/python/typeshed/issues/2706
    bucket: ServiceResource,
    file_name: str,
    content_type: Optional[str],
    user_profile: UserProfile,
    contents: bytes,
) -> None:
    key = bucket.Object(file_name)
    metadata = {
        "user_profile_id": str(user_profile.id),
        "realm_id": str(user_profile.realm_id),
    }

    content_disposition = ""
    if content_type is None:
        content_type = ""
    if content_type not in INLINE_MIME_TYPES:
        content_disposition = "attachment"

    key.put(
        Body=contents,
        Metadata=metadata,
        ContentType=content_type,
        ContentDisposition=content_disposition,
    )
Esempio n. 9
0
def put_metadata_item(metadata: dict, dynamodb_resource: ServiceResource):
    """
    metadataをDynamoDBに保存する
    """
    table_name = _get_table_name()
    table = dynamodb_resource.Table(table_name)
    table.put_item(Item=metadata)
def upsert_user_make_history(user_id: str, user_name: str, amount: int,
                             table_name: str,
                             dynamodb_resource: ServiceResource) -> int:
    jst = timezone(offset=timedelta(hours=+9), name="jst")
    now = datetime.now(jst).strftime("%Y年%m月")

    table = dynamodb_resource.Table(table_name)
    key_user_times = f"{user_id}_times"
    option = {
        "Key": {
            "partitionId": "userMakeHistory",
            "sortId": now
        },
        "UpdateExpression":
        "add #user_times :user_times set #user_name = :user_name, #updatedAt = :updatedAt",
        "ExpressionAttributeNames": {
            "#user_times": key_user_times,
            "#user_name": f"{user_id}_name",
            "#updatedAt": "updatedAt",
        },
        "ExpressionAttributeValues": {
            ":user_times": amount,
            ":user_name": user_name,
            ":updatedAt": int(datetime.now(timezone.utc).timestamp() * 1000),
        },
        "ReturnValues": "ALL_NEW",
    }
    resp = table.update_item(**option)
    return int(resp["Attributes"][key_user_times])
def get_a_metadata(id: str,
                   dynamodb_resource: ServiceResource) -> Optional[dict]:
    """
    IDを使って、metadataを取得する
    """
    table = dynamodb_resource.Table(get_table_name())
    resp = table.get_item(Key={'id': id})
    return resp.get('Item')
Esempio n. 12
0
def upload_prices_to_s3(bucket: ServiceResource, local_dir: Path,
                        remote_dir: Path, rics: List[str]) -> None:

    for ric in rics:

        local_filename = ric2filename(local_dir, ric, 'csv.gz')

        key = str(remote_dir / Path(local_filename.name))

        objs = list(bucket.objects.filter(Prefix=key).all())

        if len(objs) > 0 and objs[0].key == key:
            continue

        with local_filename.open(mode='rb') as body:

            bucket.put_object(Key=key, Body=body)
Esempio n. 13
0
def fetch_a_metadata(id: str,
                     dynamodb_resource: ServiceResource) -> Optional[dict]:
    """
    DynamoDBからmetadataを単件取得する。該当するmetadataがなければnullを返す(not found)。
    """
    table = table = dynamodb_resource.Table(get_table_name())
    resp = table.get_item(Key={'id': id})
    return resp.get('Item')
def scan_table(table_name: str,
               dynamodb_resource: ServiceResource) -> List[dict]:
    table = dynamodb_resource.Table(table_name)
    resp = table.scan()
    table_items = resp['Items']
    while 'LastEvaluatedKey' in resp:
        resp = table.scan(ExclusiveStartKey=resp['LastEvaluatedKey'])
        table_items.extend(resp['Items'])
    return table_items
Esempio n. 15
0
def delete_diff_items(table_name: str, partition_key: str, sort_key: str,
                      diff_keys: List[str],
                      dynamodb_resource: ServiceResource) -> None:
    table = dynamodb_resource.Table(table_name)
    for diff_key in diff_keys:
        key = {partition_key: diff_key.split(KEY_SPLITTER)[0]}
        if sort_key != '':
            sort_key_d = {sort_key: diff_key.split(KEY_SPLITTER)[1]}
            key.update(**sort_key_d)
        table.delete_item(Key=key)
Esempio n. 16
0
def create_instances(ec2_resource: ServiceResource,
                     image_id: str,
                     key_name: str,
                     instance_type: str,
                     num_instances: int = 1,
                     security_group_ids: Optional[List] = None,
                     user_data: Optional[Union[str, bytes]] = None,
                     block_device_map: Optional[List[Dict]] = None,
                     instance_profile_arn: Optional[str] = None,
                     placement_az: Optional[str] = None,
                     subnet_id: str = None,
                     tags: Optional[Dict[str, str]] = None) -> List[dict]:
    """
    Replaces create_ondemand_instances.  Uses boto3 and returns a list of Boto3 instance dicts.

    See "create_instances" (returns a list of ec2.Instance objects):
      https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
    Not to be confused with "run_instances" (same input args; returns a dictionary):
      https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Client.run_instances

    Tags, if given, are applied to the instances, and all volumes.
    """
    logger.info('Creating %s instance(s) ... ', instance_type)

    if isinstance(user_data, str):
        user_data = user_data.encode('utf-8')

    request = {'ImageId': image_id,
               'MinCount': num_instances,
               'MaxCount': num_instances,
               'KeyName': key_name,
               'SecurityGroupIds': security_group_ids,
               'InstanceType': instance_type,
               'UserData': user_data,
               'BlockDeviceMappings': block_device_map,
               'SubnetId': subnet_id}

    if instance_profile_arn:
        # We could just retry when we get an error because the ARN doesn't
        # exist, but we might as well wait for it.
        wait_until_instance_profile_arn_exists(instance_profile_arn)

        # Add it to the request
        request['IamInstanceProfile'] = {'Arn': instance_profile_arn}

    if placement_az:
        request['Placement'] = {'AvailabilityZone': placement_az}

    if tags:
        # Tag everything when we make it.
        flat_tags = flatten_tags(tags)
        request['TagSpecifications'] = [{'ResourceType': 'instance', 'Tags': flat_tags},
                                        {'ResourceType': 'volume', 'Tags': flat_tags}]

    return ec2_resource.create_instances(**prune(request))
Esempio n. 17
0
    def delete_file_from_s3(self, path_id: str, bucket: ServiceResource) -> bool:
        key = bucket.Object(path_id)

        try:
            key.load()
        except botocore.exceptions.ClientError:
            file_name = path_id.split("/")[-1]
            logging.warning("%s does not exist. Its entry in the database will be removed.", file_name)
            return False
        key.delete()
        return True
Esempio n. 18
0
def update_sack_counter(amount: int, table_name: str, dynamodb_resource: ServiceResource) -> int:
    table = dynamodb_resource.Table(table_name)
    option = {
        "Key": {"partitionId": "sackCounter", "sortId": "counter"},
        "UpdateExpression": "add #times :amount set #updatedAt = :updatedAt",
        "ExpressionAttributeNames": {"#times": "times", "#updatedAt": "updatedAt"},
        "ExpressionAttributeValues": {
            ":amount": amount,
            ":updatedAt": int(datetime.now(timezone.utc).timestamp() * 1000),
        },
        "ReturnValues": "ALL_NEW",
    }
    resp = table.update_item(**option)
    return resp["Attributes"]["times"]
Esempio n. 19
0
def scan_metadata(dynamodb_resource: ServiceResource,
                  last_evaluated_key: Optional[dict] = None) -> List[dict]:
    """
    DynamoDBからmetadataを全件取得する。
    scanではデータ量が多いと一度で取得できない場合があるので、再帰的に処理を行い全件取得するようにしている。
    """
    table = dynamodb_resource.Table(get_table_name())
    option = {}
    if last_evaluated_key is not None:
        option['ExclusiveStartKey'] = last_evaluated_key
    resp = table.scan(**option)
    result = resp.get('Items', [])
    if 'LastEvaluatedKey' in resp:
        result += scan_metadata(dynamodb_resource,
                                last_evaluated_key=resp['LastEvaluatedKey'])
    return result
def get_open_sack_history(table_name: str,
                          dynamodb_resource: ServiceResource) -> List[dict]:
    table = dynamodb_resource.Table(table_name)
    option = {
        "KeyConditionExpression": Key("partitionId").eq("sackHistory"),
        "Limit": 5,
        "ScanIndexForward": False,
        "ProjectionExpression": "#sortId, #times",
        "ExpressionAttributeNames": {
            "#sortId": "sortId",
            "#times": "times"
        },
    }
    resp = table.query(**option)
    logger.info("get open sack history result", option=option, response=resp)
    return resp.get("Items", [])
Esempio n. 21
0
def insert_sack_history(times: int, table_name: str,
                        dynamodb_resouce: ServiceResource) -> None:
    table = dynamodb_resouce.Table(table_name)

    jst = timezone(offset=timedelta(hours=+9), name="jst")
    now = str(datetime.now(jst))

    option = {
        "Item": {
            "partitionId": "sackHistory",
            "sortId": now,
            "times": times
        }
    }

    resp = table.put_item(**option)
    logger.info("insert sack history result", option=option, resp=resp)
Esempio n. 22
0
File: ec2.py Progetto: stevekm/toil
def create_instances(ec2: ServiceResource,
                     image_id: str,
                     key_name: str,
                     instance_type: str,
                     instance_profile_arn: Dict,
                     num_instances: int = 1,
                     security_group_ids: Optional[List] = None,
                     user_data: Optional[bytes] = None,
                     block_device_map: Optional[List[Dict]] = None,
                     placement: Optional[Dict] = None,
                     subnet_id: str = None):
    """
    Replaces create_ondemand_instances.  Uses boto3.

    See "create_instances" (returns a list of ec2.Instance objects):
      https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
    Not to be confused with "run_instances" (same input args; returns a dictionary):
      https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Client.run_instances
    """
    log.info('Creating %s instance(s) ... ', instance_type)
    wait_until_instance_profile_arn_exists(instance_profile_arn)
    request = {
        'ImageId': image_id,
        'MinCount': num_instances,
        'MaxCount': num_instances,
        'KeyName': key_name,
        'SecurityGroupIds': security_group_ids,
        'InstanceType': instance_type,
        'UserData': user_data,
        'Placement': placement,
        'BlockDeviceMappings': block_device_map,
        'IamInstanceProfile': instance_profile_arn,
        'SubnetId': subnet_id
    }

    # remove empty args
    actual_request = dict()
    for key in request:
        if request[key]:
            actual_request[key] = request[key]

    return ec2.create_instances(**actual_request)
Esempio n. 23
0
def reset_sack_counter(table_name: str,
                       dynamodb_resource: ServiceResource) -> None:
    table = dynamodb_resource.Table(table_name)
    option = {
        "Key": {
            "partitionId": "sackCounter",
            "sortId": "counter"
        },
        "UpdateExpression": "set #times = :times, #updatedAt = :updatedAt",
        "ExpressionAttributeNames": {
            "#times": "times",
            "#updatedAt": "updatedAt"
        },
        "ExpressionAttributeValues": {
            ":times": 0,
            ":updatedAt": int(datetime.now(timezone.utc).timestamp() * 1000)
        },
    }
    resp = table.update_item(**option)
    logger.info("reset sack counter result", resp=resp)
def update_make_history(user_id: str, user_name: str, ts: str, table_name: str,
                        dynamodb_resource: ServiceResource) -> Tuple[int, int]:
    table = dynamodb_resource.Table(table_name)
    partition_id = "makeHistory"
    option = {
        "Key": {
            "partitionId": partition_id,
            "sortId": ts
        },
        "ConditionExpression":
        Key("partitionId").eq(partition_id)
        & Key("sortId").eq(ts)
        & Attr("isDetected").eq(False),
        "UpdateExpression":
        "set #updatedAt = :updatedAt, #userName = :userName, #isDetected = :isDetected",
        "ExpressionAttributeNames": {
            "#updatedAt": "updatedAt",
            "#userName": f"{user_id}_name",
            "#isDetected": "isDetected",
        },
        "ExpressionAttributeValues": {
            ":updatedAt": int(datetime.now(timezone.utc).timestamp() * 1000),
            ":userName": user_name,
            ":isDetected": True,
        },
        "ReturnValues":
        "ALL_NEW",
    }

    try:
        resp = table.update_item(**option)
        logger.info("update make history result", option=option, response=resp)

        times = resp["Attributes"]["times"]
        amount = resp["Attributes"]["amount"]

        return int(times), int(amount)
    except ClientError as e:
        if e.response["Error"]["Code"] == "ConditionalCheckFailedException":
            raise NotTargetError()
        raise
Esempio n. 25
0
def read_s3(bucket: str, key: str, s3: ServiceResource = None):
    """
    returns file
    :type bucket: str
    :param bucket:

    :type key: str
    :param key:
    :param s3: S3 resource

    :return: str
    """
    if not s3:
        log.warning('creating a S3 resource in read_s3() function')
        s3 = get_s3_resource()
    t0 = time()
    log.info("Downloading config file {0} from s3://{1}...".format(
        key, bucket))
    obj = s3.Object(bucket, key)
    log.debug('ET for reading {} from S3: {} sec'.format(
        key, round(time() - t0, 4)))
    return obj.get()['Body'].read().decode('utf-8')
def get_user_make_history(year: int, month: int, table_name: str, dynamodb_resource: ServiceResource) -> dict:
    table = dynamodb_resource.Table(table_name)
    option = {"Key": {"partitionId": "userMakeHistory", "sortId": f"{year}年{month:02}月"}}
    resp = table.get_item(**option)
    logger.info("get user make history result", option=option, response=resp)
    return resp.get("Item")
Esempio n. 27
0
def put_to_table(table_name: str, src_items: List[dict],
                 dynamodb_resource: ServiceResource) -> None:
    table = dynamodb_resource.Table(table_name)
    with table.batch_writer() as batch:
        for item in src_items:
            batch.put_item(Item=item)
Esempio n. 28
0
 def __init__(self, table_name: str, dynamodb_resource: ServiceResource):
     self.table_name = table_name
     self.dynamodb_resource = dynamodb_resource
     self.create()
     self.table = dynamodb_resource.Table(table_name)