コード例 #1
0
ファイル: api.py プロジェクト: elessarelfstone/tcomapi
def load2(url, struct, updates_date=None):
    def check_modified(m_date, c_date):
        m_date_format = '%Y-%m-%d %H:%M:%S'
        if datetime.strptime(m_date, m_date_format).date() > c_date:
            return True
        return False

    r = get(url, headers=headers, timeout=5)
    data = []

    # trasform to python object(dict, list)
    raw = json.loads(r)

    if isinstance(raw, dict):
        box_obj = Box(raw)
        if hasattr(box_obj, 'error'):
            # raise error if instead of data
            # we get error dict in response
            raise ElkRequestError(box_obj.error)

    # if we parse only updates, after given date
    if updates_date:
        for r in raw:
            m_date = Box(r, default_box=True).modified
            if m_date and check_modified(m_date, updates_date):
                data.append(dict_to_csvrow(r, struct))
        return data

    # data = [attr.astuple(struct(**d)) for d in raw]
    data = [dict_to_csvrow(d, struct) for d in raw]

    return data
コード例 #2
0
    def run(self):
        client = self.get_client()
        query = gql(self.query)
        start_from = None
        params = {
            'from': str(self.start_date),
            'to': str(self.end_date),
            'limit': self.limit
        }

        while True:
            p = params
            if start_from:
                p["after"] = start_from

            data = client.execute(query, variable_values=p)
            if data.get('Contract') is None or len(data.get('Contract',
                                                            [])) == 0:
                break

            last_id = data.get('Contract', [])[-1]['id']
            start_from = last_id
            data = [
                dict_to_csvrow(d, self.struct) for d in data.get('Contract')
            ]
            save_csvrows(self.output().path, data, sep=self.sep, quoter="\"")
コード例 #3
0
    def run(self):
        client = self.get_client()
        query = gql(self.query)
        start_from = None
        params = {
            'from': str(self.start_date),
            'to': str(self.end_date),
            'limit': self.limit
        }

        header = tuple(f.name for f in attr.fields(GoszakupCompanyRow))
        save_csvrows(self.output().path, [header], sep=self.sep)

        while True:
            p = params
            if start_from:
                p["after"] = start_from

            data = client.execute(query, variable_values=p)
            if data.get('Subjects') is None or len(data.get('Subjects',
                                                            [])) == 0:
                break

            last_id = data.get('Subjects', [])[-1]['pid']
            start_from = last_id
            data = [
                dict_to_csvrow(d, self.struct) for d in data.get('Subjects')
            ]
            save_csvrows(self.output().path, data, sep=self.sep, quoter="\"")
コード例 #4
0
ファイル: api.py プロジェクト: elessarelfstone/tcomapi
    async def process_id(self, session, idx, semaphore):

        row = ()
        try:
            async with self.ratelimit:
                async with semaphore:
                    d = await self._load(session, idx)
                    row = ';'.join(dict_to_csvrow(d, JuridicalInfo))
        except NotSuccessError as e:
            self.stat['nse'] += 1
            # print('--', idx1)
            append_file(self.fm.parsed_file, idx)
        else:
            append_file(self.fm.curr_file, row)
            append_file(self.fm.parsed_file, idx)

        return row, idx
コード例 #5
0
ファイル: api.py プロジェクト: elessarelfstone/tcomapi
    def _load(self, bid):
        request = self.request_template.format(bid, *self._date_range)
        url = self.url_template.format(self.host, self._token)

        r = requests.post(url, request, headers=self.headers, verify=False, timeout=self._timeout)

        status_code = r.status_code

        if status_code != 200:
            if status_code == 429:
                raise KgdTooManyRequests('Kgd limitation exceeded')
            r.raise_for_status()

        if r.text:
            try:
                d = Box(parse(r.text)).answer

            # it could be html(from squid proxy for example)
            # or some other trash
            # not xml formatted
            except ExpatError:
                raise KgdResponseError('Not XML formatted')

        else:
            # recently KGD started to send
            # empty response to limit
            # count of requests in certain time
            # earlier they've just sending TooManyRequests exception
            raise KgdResponseError('Empty response received')

        # if we get request error
        # something wrong with our xml-request
        # all errors described in KGD API docs
        if 'err' in d:
            errcode = d.err.errorcode
            raise KgdRequestError(f'Errorcode {errcode}')

        # it might be just one payment
        payments = d.payment if isinstance(d.payment, list) else [d.payment]

        # enrich each row by bin
        for p in payments:
            p.bin = bid

        return [dict_to_csvrow(p, self.struct) for p in payments]
コード例 #6
0
    def run(self):
        error_timeout = self.timeout * 3
        headers = dict()
        headers['Authorization'] = self.token

        url = f'{self.url}?limit={self.limit}'
        host = '{uri.scheme}://{uri.netloc}'.format(uri=urlparse(url))

        # we store parsed blocks of data as uris
        # in case reruning we parse last uri
        if os.path.exists(self.parsed_fpath):
            uri = read_lines(self.parsed_fpath).pop()
            url = f'{host}{uri}'

        total = 0
        parsed_count = get_file_lines_count(self.output().path)
        parsed_count = 0 if not parsed_count else parsed_count

        while url:
            try:
                r = get(url, headers=headers, timeout=self.timeout)
            except Exception:
                sleep(error_timeout)
            else:
                response = Box(json.loads(r))
                if response.next_page:
                    url = f'{self.url}?{response.next_page}'
                    append_file(self.parsed_fpath, response.next_page)
                else:
                    url = None

                total = response.total
                raw_items = list(response['items'])
                # data = dict_to_csvrow(raw_items, self.struct)
                data = [dict_to_csvrow(d, self.struct) for d in raw_items]
                save_csvrows(self.output().path, data, quoter="\"")
                parsed_count += self.limit
                sleep(self.timeout)

            self.set_status_message(f'Total: {total}. Parsed: {parsed_count}')
            self.set_progress_percentage(round((parsed_count * 100) / total))

        stat = dict(total=total, parsed=parsed_count)
        append_file(self.success_fpath, str(stat))
コード例 #7
0
ファイル: api.py プロジェクト: elessarelfstone/tcomapi
def load3(url, struct):
    def check_modified(m_date, c_date):
        m_date_format = '%Y-%m-%d %H:%M:%S'
        if datetime.strptime(m_date, m_date_format).date() > c_date:
            return True
        return False

    r = get(url, headers=headers, timeout=5)
    data = []

    # trasform to python object(dict, list)
    raw = json.loads(r)

    if isinstance(raw, dict):
        box_obj = Box(raw)
        if hasattr(box_obj, 'error'):
            # raise error if instead of data
            # we get error dict in response
            raise ElkRequestError(box_obj.error)

    return [dict_to_csvrow(d, struct) for d in raw]
コード例 #8
0
ファイル: api.py プロジェクト: elessarelfstone/tcomapi
def load_data(url, struct, column_filter=None):

    data = []

    try:
        r = load_content(url, headers=headers, timeout=TIMEOUT)

        # trasform to python object(dict, list)
        raw = json.loads(r)

        if isinstance(raw, dict):
            box_obj = Box(raw)
            if hasattr(box_obj, 'error'):
                # raise error if instead of data
                # we get error dict in response
                raise HTTPError(box_obj.error)

        for d in raw:
            try:
                # we need all values to be string

                _d = {k: str(v) for (k, v) in d.items()}
                # convert values in dict to str
                # for k, v in d.items():

                if column_filter:
                    _d = apply_filter_to_dict(_d, column_filter)

                data.append(dict_to_csvrow(_d, struct))
            except BadDataType as e:
                pass

    except (HTTPError, ReadTimeout) as e:
        raise ExternalSourceError('Could not load {}'.format(url))

    except BadDataType:
        pass

    return data