Exemple #1
0
def get_download_url_by_export_id(export_id):
  client = API.BaseClient()

  result = client.get(f"/scrapers/exports/{str(export_id)}/download")['signed_url']
  
  if 'message' in result and result['message'] == "dbr: not found":
    raise ValueError(f"Export with ID {str(export_id)} was not found")
  else:
    return result
Exemple #2
0
def get_job_history(scraper_name):
    client = API.BaseClient()

    result = client.get(f"/scrapers/{scraper_name}/current_job/stats/history")

    if 'message' in result and result['message'] == "dbr: not found":
        raise ValueError(f"Scraper named {scraper_name} was not found")
    else:
        return result
def get_recent_jobs(scraper_name):
    client = API.BaseClient()

    result = client.get(f"/scrapers/{scraper_name}/jobs")

    if 'message' in result and result['message'] == "dbr: not found":
        raise ValueError(f"Scraper named {scraper_name} was not found")
    else:
        return result
Exemple #4
0
def parsing_update(job_id, gid, options):
    client = API.BaseClient()

    body = {}
    body['outputs'] = options['outputs']
    body['pages'] = []
    body['parsing_status'] = options['status']

    return client.put(f"/jobs/{job_id}/pages/{gid}/parsing_update", body)
Exemple #5
0
def get_current_job_stats(scraper_name):
  client = API.BaseClient()

  result = client.get(f"/scrapers/{scraper_name}/current_job/stats/current")
  
  if 'message' in result and result['message'] == "dbr: not found":
    raise ValueError(f"Scraper named {scraper_name} was not found or no active job is present")
  else:
    return result
def find(scraper_name, options={}):
    client = API.BaseClient()

    result = client.get(f"/scrapers/{scraper_name}/current_job", options)

    if 'message' in result and result['message'] == "dbr: not found":
        raise ValueError(
            f"Scraper with name {scraper_name} or current job was not found")
    else:
        return result
def get_by_id(job_id):
    client = API.BaseClient()
    input_job_id = str(job_id)

    result = client.get(f"/jobs/{input_job_id}")

    if 'message' in result and result['message'] == "dbr: not found":
        raise ValueError(f"Job with ID {input_job_id} was not found")
    else:
        return result
Exemple #8
0
def update(scraper_name, params = {}):
  client = API.BaseClient()

  query = {}
  for key in params:
    query[key] = params[key]

  query['name'] = scraper_name

  result = client.put(f"/scrapers/{scraper_name}", params=query)

  return result
Exemple #9
0
def create(scraper_name, git_repository, params = {}):
  client = API.BaseClient()

  query = {}
  for key in params:
    query[key] = params[key]

  query['name'] = scraper_name
  query['git_repository'] = git_repository

  result = client.post(f"/scrapers", params=query)

  return result
Exemple #10
0
def find(job_id, gid, params={}):
    client = API.BaseClient()

    return client.get(f"/jobs/{job_id}/pages/{gid}", params)
Exemple #11
0
def all(job_id, params):
    client = API.BaseClient()

    return client.get(f"/jobs/{job_id}/pages", params)
Exemple #12
0
def all(scraper_name, params):
    client = API.BaseClient()

    return client.get(f"/scrapers/{scraper_name}/current_job/pages",
                      params=params)
Exemple #13
0
def find_content(scraper_name, gid):
    client = API.BaseClient()

    return client.get(
        f"/scrapers/{scraper_name}/current_job/pages/{gid}/content",
        {'get_raw': 1})
Exemple #14
0
def find_failed_content(gid):
    client = API.BaseClient()

    result = client.get(f"/global_pages/{gid}/failed_content")

    return result
Exemple #15
0
def get_by_name(scraper_name):
  client = API.BaseClient()

  result = client.get(f"/scrapers/{scraper_name}")

  return result
Exemple #16
0
def delete(scraper_name):
  client = API.BaseClient()

  result = client.delete(f"/scrapers/{scraper_name}")

  return result
Exemple #17
0
def all(params = {}):
  client = API.BaseClient()

  return client.get('/scrapers', params=params)
Exemple #18
0
def find(gid):
    client = API.BaseClient()

    result = client.get(f"/global_pages/{gid}")

    return result