def post(self, uri, data=None, files=None, verify=False):
        """
            Sends a POST request.

            @param uri: Uri of Service API.
            @param data: Requesting Data. Default: None

            @raise NetworkAPIClientError: Client failed to access the API.
        """
        try:

            request = requests.post(
                self._url(uri),
                data=json.dumps(data),
                files=files,
                auth=self._auth_basic(),
                headers=self._header(),
                verify=verify
            )

            request.raise_for_status()

            return self._parse(request.text)

        except HTTPError:
            error = self._parse(request.text)
            self.logger.error(error)
            raise NetworkAPIClientError(error.get('detail', ''))
        finally:
            self.logger.info('URI: %s', uri)
            self.logger.info('Status Code: %s', request.status_code)
            self.logger.info('X-Request-Id: %s',
                             request.headers.get('x-request-id'))
            self.logger.info('X-Request-Context: %s',
                             request.headers.get('x-request-context'))
Exemple #2
0
    def perform_rest_action(self, endpoint, hdrs, params, regions):
        """ construct the POST or GET request"""
        if params:
            endpoint += '?' + urllib.parse.urlencode(params)
        else:
            endpoint += '?'
        data = None
        # check if rate limit is needed
        if self.req_count >= self.reqs_per_sec:
            delta = time.time() - self.last_req
            if delta < 1:
                time.sleep(1 - delta)
            self.last_req = time.time()
            self.req_count = 0

        # submit the POST or GET request to Ensembl REST API server
        # and try to catch errors returned by the server

        if regions:
            request = requests.post(self.server + endpoint,
                                    headers=hdrs,
                                    data=json.dumps(regions))
        else:
            request = requests.get(self.server + endpoint, headers=hdrs)

        if not request.ok:
            request.raise_for_status()
            sys.exit()

        data = request.json()
        self.req_count += 1

        return data
Exemple #3
0
def get_html_text(url):
    b_overtime = True
    while b_overtime:
        try:
            my_header = {
                "User-Agent": random.choice(my_headers.common_headers)
            }
            request = requests.get(url, headers=my_header, timeout=10)
            request.raise_for_status()
            request.encoding = request.apparent_encoding
            b_overtime = False
            return request.text
        except ConnectionError and RuntimeError:
            print('timeout. waiting retry....')
        except Exception as e:
            print(e)
Exemple #4
0
def url_retry(url, num_retries=3):
    try:

        r = requests.get('http://www.itwhy.org')
        request = requests.get(url, timeout=60)
        #raise_for_status(),如果不是200会抛出HTTPError错误
        request.raise_for_status()
        soup = get_soup(url)
    except request.HTTPError as e:
        soup = None
        write_err(e)
        with open('log.txt', 'a') as f:
            f.write()
        if num_retries > 0:
            #如果不是200就重试,每次递减重试次数,使用函数获取soup数据
            return url_retry(url, num_retries - 1)
    #如果url不存在会抛出ConnectionError错误,这个情况不做重试
    except request.exceptions.ConnectionError as e:
        return
    except request.exceptions.TimeOut:
        return

    return soup
Exemple #5
0
    def ensure(self):
        # Create parent directory
        if not self._path.parent.exists():
            logging.info("Creating directory '%s'", self._path.parent)
            #3.5
            #self._path.parent.mkdir(mode=0o755, parents=True, exists_ok=True)
            self._path.parent.mkdir(mode=0o755, parents=True)

        # If the file exists, validate it
        if self._path.exists():
            if self._md5sum is not None:
                hasher = hashlib.md5()
                with self._path.open(mode='rb') as f:
                    while True:
                        hasher.update(f.read(1024*1024))
                        if f.eof():
                            break
                file_md5sum = hasher.hexdigest()
                if (file_md5sum == self._md5sum):
                    # All is well
                    logging.info("Hash ok for '%s', not downloading",
                                 self._path)
                    return
                else:
                    # There should not be any mismatches here, strange.
                    logging.warning("Hash mismatch for '%s' (expected: %s, got: %s) removing file",
                                    (self._path, self._md5sum, file_md5sum))
                    self._path.unlink()

        # Make sure that we don't use a link to a url shortener service
        self._unshorten()
        
        # Download the file
        headers = {}
        if self._md5sum is not None:
            headers['etag'] = self._md5sum
        logging.info("Downloading %s", self._url)
        request = requests.get(self._url, headers=headers)
        if not request.status_code == 200:
            logging.error("Unable to download url '%s'"
                                    % (self._url,))
            request.raise_for_status()

        # Write the file, and check the md5 checksum while doing it
        hasher = hashlib.md5()
        with self._path.open(mode='wb') as f:
            hasher.update(request.content)
            f.write(request.content)
        file_md5sum = hasher.hexdigest()

        # Check the ETag, if we got one from the server
        #etag = request.headers.get('etag')
        etag = None
        if (not etag is None) and etag != file_md5sum:
            self._path.unlink()
            raise DownloadException("ETag mismatch for '%s' (expected: %s, got: %s) removing file" %
                                    (self._url, etag, file_md5sum))

        # Check the md5sum, if we got one from the caller
        if not (self._md5sum is None or file_md5sum == self._md5sum):
            # The server did not serve the expected file, raise an error
            self._path.unlink()
            raise DownloadException("Hash mismatch for '%s' (expected: %s, got: %s) removing file" %
                                    (self._url, self._md5sum, file_md5sum))
def main():
    # Log File Setting
    time = datetime.datetime.now()
    logFileName = str(time.strftime('%d%m%y%H%M%S')) + '.log'
    logging.basicConfig(filename=logFileName,
                        format='%(asctime)s - %(levelname)s - %(message)s',
                        level=logging.INFO,
                        datefmt='%Y-%m-%d %H:%M:%S')
    logger = logging.getLogger(__name__)

    # Config.json location
    configFile = 'config.json'
    with open(configFile) as globalSettings:
        config = json.load(globalSettings)

    # Loading Global settings
    teamNum = str(config['team'])
    state = str(config['state']).lower()
    ACCESS_KEY = str(config['AWSAccess'])
    SECRET_KEY = str(config['AWSSecret'])
    link = str(config['link'])
    email = str(config['notificationEmail'])
    assignNum = 1
    # Variables
    bucketName = 'team' + str(teamNum) + state + 'assignment' + str(assignNum)

    # Connect to S3
    s3Session = boto3.Session(
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
    )
    s3 = s3Session.resource('s3')
    logger.info('S3 Connected.')

    # Create a Bucket (Name should not be uppercase)
    # Check Whether the bucket has been created
    isCreated = False
    bucket = None
    for bucket in s3.buckets.all():
        if bucket.name == bucketName:
            isCreated = True
            print('Skip: ' + 'Bucket(' + bucketName + ')' + ' Already Created.')
            logger.warning('Bucket(' + bucketName + ')' + ' Already Created.')
            break
    if not isCreated:
        bucket = s3.create_bucket(Bucket=bucketName, CreateBucketConfiguration={
            'LocationConstraint': 'us-west-2'})
        logger.info('Bucket(' + bucketName + ')' + 'Created')
        bucket.Acl().put(ACL='public-read')
        logger.info('Bucket Set to Public')

    # Access LCD
    print('###### Trying to Access LCD DataSet ######')
    try:
        request = requests.get(link)
        content = request.content
        request.raise_for_status()
    except requests.exceptions.HTTPError as e:
        print(e)
        logger.error(e)
        logger.error("No LCD DataSet Found, DataIngestion Stopped.")
        print('###### No LCD DataSet Found, DataIngestion Stopped. ######')
        return
    data = pd.read_csv(io.StringIO(content.decode('utf-8')), dtype=str, sep=',')
    logger.info('Reading DataSet from URL')
    stationId = str(data.ix[0, 'STATION'])
    station, id = stationId.split(':')
    date = time.date().strftime('%d%m%Y')
    fileName = str(config['state']) + '_' + date + '_' + station + '_' + id + '.csv'
    print('###### LCD DataSet Loading Completed ######')

    # Upload File to S3
    # Check Whether File Exists
    logger.info('Checking Whether File Exists on S3...')
    isExist = check_file(bucket, fileName)
    # for key in bucket.objects.all():
    #     if key.key == fileName:
    #         isExist = True
    #         print('Skip: ' + 'File(' + fileName + ')' + ' Already Exists.')
    #         break
    if not isExist:
        # Download to local file system
        logger.info('No Data on S3: Try Downloading DataSet from URL')
        # fileNameForToday = 'OneDay' + '_' + fileName
        # urllib.request.urlretrieve(link, fileNameForToday)
        urllib.request.urlretrieve(link, fileName)
        logger.info('Download Completed')

        # Upload Today's data to S3
        logger.info('Starting Upload Data till Today to S3')
        s3.Object(bucketName, fileName).put(Body=open(fileName, 'rb'))
        s3.Object(bucketName, fileName).Acl().put(ACL='public-read')
        print('Upload: Success')
        logger.info('Data till Today Upload Succeed')
    else:
        logger.warning('File(' + fileName + ')' + ' Already Exists on S3.')

    # Program END
    logger.info('###### DataIngestion Finished ######')
    print('###### Find logs in ddmmyyHHMMSS.log ######')