Python get Examples, osaka.main.get Python Examples

Example #1

0

Show file

File: scrape_apihub_opensearch.py Project: shitong01/scihub_acquisition_scraper

def create_acq_dataset(ds, met, root_ds_dir=".", browse=False):
    """Create acquisition dataset. Return tuple of (dataset ID, dataset dir)."""

    # create dataset dir
    id = "acquisition-{}-esa_scihub".format(met["title"])
    root_ds_dir = os.path.abspath(root_ds_dir)
    ds_dir = os.path.join(root_ds_dir, id)
    if not os.path.isdir(ds_dir): os.makedirs(ds_dir, 0755)

    # append source to met
    met['query_api'] = "opensearch"
    # set IPF version to None
    met['processing_version'] = None

    # dump dataset and met JSON
    ds_file = os.path.join(ds_dir, "%s.dataset.json" % id)
    met_file = os.path.join(ds_dir, "%s.met.json" % id)
    with open(ds_file, 'w') as f:
        json.dump(ds, f, indent=2, sort_keys=True)
    with open(met_file, 'w') as f:
        json.dump(met, f, indent=2, sort_keys=True)

    # create browse?
    if browse:
        browse_jpg = os.path.join(ds_dir, "browse.jpg")
        browse_png = os.path.join(ds_dir, "browse.png")
        browse_small_png = os.path.join(ds_dir, "browse_small.png")
        get(met['icon'], browse_jpg)
        check_call(["convert", browse_jpg, browse_png])
        os.unlink(browse_jpg)
        check_call(
            ["convert", "-resize", "250x250", browse_png, browse_small_png])

    return id, ds_dir

Example #2

0

Show file

def create_acq_dataset(ds, met, manifest, root_ds_dir=".", browse=False):
    """Create acquisition dataset. Return tuple of (dataset ID, dataset dir)."""

    # create dataset dir
    id = "acquisition-{}_{}_{}_{}-esa_scihub".format(
        met["platform"], get_timestamp_for_filename(met["sensingStart"]),
        met["trackNumber"], met["sensoroperationalmode"])
    root_ds_dir = os.path.abspath(root_ds_dir)
    ds_dir = os.path.join(root_ds_dir, id)
    if not os.path.isdir(ds_dir): os.makedirs(ds_dir, 0755)

    # append source to met
    met['query_api'] = "opensearch"

    # append processing version (ipf)
    # ns = get_namespaces(manifest)
    # x = fromstring(manifest)
    # ipf = x.xpath('.//xmlData/safe:processing/safe:facility/safe:software/@version', namespaces=ns)[0]
    # met['processing_version'] = ipf
    met['processing_version'] = None

    # dump dataset and met JSON
    ds_file = os.path.join(ds_dir, "%s.dataset.json" % id)
    met_file = os.path.join(ds_dir, "%s.met.json" % id)
    with open(ds_file, 'w') as f:
        json.dump(ds, f, indent=2, sort_keys=True)
    with open(met_file, 'w') as f:
        json.dump(met, f, indent=2, sort_keys=True)

    # dump manifest
    manifest_file = os.path.join(ds_dir, "manifest.safe")
    with open(manifest_file, 'w') as f:
        f.write(manifest)

    # create browse?
    if browse:
        browse_jpg = os.path.join(ds_dir, "browse.jpg")
        browse_png = os.path.join(ds_dir, "browse.png")
        browse_small_png = os.path.join(ds_dir, "browse_small.png")
        get(met['icon'], browse_jpg)
        check_call(["convert", browse_jpg, browse_png])
        os.unlink(browse_jpg)
        check_call(
            ["convert", "-resize", "250x250", browse_png, browse_small_png])

    return id, ds_dir

Example #3

0

Show file

def crawl(ds_es_url, dataset_version, tag):
    """Crawl for calibration files and create datasets if they don't exist in ES."""

    active_ids = []
    for id, url in crawl_cals(dataset_version):
        #logger.info("%s: %s" % (id, url))
        active_ids.append(id)
        total, found_id = check_cal(ds_es_url, "grq", id)
        if total > 0:
            logger.info("Found %s." % id)
        else:
            logger.info("Missing %s. Creating dataset." % id)
            cal_tar_file = os.path.basename(url)
            get(url, cal_tar_file)
            safe_tar_file = cal_tar_file.replace('.TGZ', '')
            shutil.move(cal_tar_file, safe_tar_file)
            create_cal_ds(safe_tar_file, ds_es_url, dataset_version)
    purge_active_cal_ds(ds_es_url, dataset_version)
    create_active_cal_ds(active_ids, dataset_version)

Example #4

0

Show file

        for url in doc['urls']:
            if url.startswith('s3://'):
                prod_url = url
                break
        if prod_url is None:
            print "Failed to find s3 url for prod %s" % prod_id
            continue
        if os.path.exists(work_dir): shutil.rmtree(work_dir)
        os.makedirs(work_dir, 0755)
        os.chdir(work_dir)
        merged_dir = "merged"
        if os.path.exists(merged_dir): shutil.rmtree(merged_dir)
        os.makedirs(merged_dir, 0755)
        unw_prod_file = "filt_topophase.unw.geo"
        unw_prod_url = "%s/merged/%s" % (prod_url, unw_prod_file)
        get(unw_prod_url, "merged/{}".format(unw_prod_file))
        for i in ('hdr', 'vrt', 'xml'):
            get("{}.{}".format(unw_prod_url, i),
                "merged/{}.{}".format(unw_prod_file, i))

        #print json.dumps(doc, indent=2)

        # clean out tiles if exists
        parsed_url = urlparse(prod_url)
        tiles_url = "s3://{}/tiles".format(parsed_url.path[1:])
        cmd = "aws s3 rm --recursive {}"
        check_call(cmd.format(tiles_url), shell=True)

        # create displacement tile layer
        vrt_prod_file = "{}.vrt".format(unw_prod_file)
        dis_layer = "displacement"

Example #5

0

Show file

def export(args):
    """Export HySDS package."""
    cont_id = args.id  # container id

    # query for container
    cont = mozart_es.get_by_id(index=CONTAINERS_INDEX, id=cont_id, ignore=404)
    if cont['found'] is False:
        logger.error("SDS package id {} not found.".format(cont_id))
        return 1

    cont_info = cont['_source']
    logger.debug("cont_info: %s" % json.dumps(cont_info, indent=2))

    # set export directory
    outdir = normpath(args.outdir)
    export_name = "{}.sdspkg".format(cont_id.replace(':', '-'))
    export_dir = os.path.join(outdir, export_name)
    logger.debug("export_dir: %s" % export_dir)

    if os.path.exists(export_dir):  # if directory exists, stop
        logger.error(
            "SDS package export directory {} exists. Not continuing.".format(
                export_dir))
        return 1

    validate_dir(export_dir)  # create export directory

    # download container
    get(cont_info['url'], export_dir)
    cont_info['url'] = os.path.basename(cont_info['url'])

    query = {
        "query": {
            "term": {
                "container.keyword": cont_id
            }  # query job specs
        }
    }
    job_specs = mozart_es.query(index=JOB_SPECS_INDEX, body=query)
    job_specs = [job_spec['_source'] for job_spec in job_specs]
    logger.debug("job_specs: %s" % json.dumps(job_specs, indent=2))

    # backwards-compatible query
    if len(job_specs) == 0:
        logger.debug("Got no job_specs. Checking deprecated mappings:")
        query = {
            "query": {
                "query_string": {
                    "query": "container:\"{}\"".format(cont_id)
                }
            }
        }
        job_specs = mozart_es.query(index=JOB_SPECS_INDEX, body=query)
        job_specs = [job_spec['_source'] for job_spec in job_specs]
        logger.debug("job_specs: %s" % json.dumps(job_specs, indent=2))

    hysds_ios = [
    ]  # pull hysds_ios for each job_spec and download any dependency images
    dep_images = {}
    for job_spec in job_specs:
        # download dependency images
        for d in job_spec.get('dependency_images', []):
            if d['container_image_name'] in dep_images:
                d['container_image_url'] = dep_images[
                    d['container_image_name']]
            else:
                # download container
                get(d['container_image_url'], export_dir)
                d['container_image_url'] = os.path.basename(
                    d['container_image_url'])
                dep_images[
                    d['container_image_name']] = d['container_image_url']

        # collect hysds_ios from mozart
        query = {
            "query": {
                "term": {
                    "job-specification.keyword": job_spec['id']
                }
            }
        }
        mozart_hysds_ios = mozart_es.query(index=HYSDS_IOS_MOZART_INDEX,
                                           body=query)
        mozart_hysds_ios = [
            hysds_io['_source'] for hysds_io in mozart_hysds_ios
        ]
        logger.debug("Found %d hysds_ios on mozart for %s." %
                     (len(mozart_hysds_ios), job_spec['id']))

        # backwards-compatible query
        if len(mozart_hysds_ios) == 0:
            logger.debug(
                "Got no hysds_ios from mozart. Checking deprecated mappings:")
            query = {
                "query": {
                    "query_string": {
                        "query":
                        "job-specification:\"{}\"".format(job_spec['id'])
                    }
                }
            }
            mozart_hysds_ios = mozart_es.query(index=HYSDS_IOS_MOZART_INDEX,
                                               body=query)
            mozart_hysds_ios = [
                hysds_io['_source'] for hysds_io in mozart_hysds_ios
            ]
            logger.debug("Found %d hysds_ios on mozart for %s." %
                         (len(mozart_hysds_ios), job_spec['id']))
        hysds_ios.extend(mozart_hysds_ios)

        # collect hysds_ios from grq
        query = {
            "query": {
                "term": {
                    "job-specification.keyword": job_spec['id']
                }
            }
        }
        grq_hysds_ios = mozart_es.query(index=HYSDS_IOS_GRQ_INDEX, body=query)
        grq_hysds_ios = [hysds_io['_source'] for hysds_io in grq_hysds_ios]
        logger.debug("Found %d hysds_ios on grq for %s." %
                     (len(grq_hysds_ios), job_spec['id']))

        # backwards-compatible query
        if len(mozart_hysds_ios) == 0:
            logger.debug(
                "Got no hysds_ios from grq. Checking deprecated mappings:")
            query = {
                "query": {
                    "query_string": {
                        "query":
                        "job-specification:\"{}\"".format(job_spec['id'])
                    }
                }
            }
            grq_hysds_ios = mozart_es.query(index=HYSDS_IOS_GRQ_INDEX,
                                            body=query)
            grq_hysds_ios = [hysds_io['_source'] for hysds_io in grq_hysds_ios]
            logger.debug("Found %d hysds_ios on grq for %s." %
                         (len(grq_hysds_ios), job_spec['id']))

        hysds_ios.extend(grq_hysds_ios)
    logger.debug("Found %d hysds_ios total." % (len(hysds_ios)))

    # export allowed accounts
    if not args.accounts:
        for hysds_io in hysds_ios:
            if 'allowed_accounts' in hysds_io:
                del hysds_io['allowed_accounts']

    # dump manifest JSON
    manifest = {
        "containers": cont_info,
        "job_specs": job_specs,
        "hysds_ios": hysds_ios,
    }
    manifest_file = os.path.join(export_dir, 'manifest.json')
    with open(manifest_file, 'w') as f:
        json.dump(manifest, f, indent=2, sort_keys=True)

    # tar up hysds package
    tar_file = os.path.join(outdir, "{}.tar".format(export_name))
    with tarfile.open(tar_file, "w") as tar:
        tar.add(export_dir, arcname=os.path.relpath(export_dir, outdir))

    shutil.rmtree(export_dir)  # remove package dir

Example #6

0

Show file

File: pkg.py Project: kennetham/sdscli

def export(args):
    """Export HySDS package."""

    # get user's SDS conf settings
    conf = SettingsConf()

    # container id
    cont_id = args.id

    # query for container
    mozart_es_url = "http://{}:9200".format(conf.get('MOZART_ES_PVT_IP'))
    grq_es_url = "http://{}:9200".format(conf.get('GRQ_ES_PVT_IP'))
    hits = run_query(mozart_es_url, "containers",
                     {"query": {
                         "term": {
                             "_id": cont_id
                         }
                     }})
    if len(hits) == 0:
        logger.error("SDS package id {} not found.".format(cont_id))
        return 1
    cont_info = hits[0]['_source']
    logger.debug("cont_info: {}".format(json.dumps(cont_info, indent=2)))

    # set export directory
    outdir = normpath(args.outdir)
    export_name = "{}.sdspkg".format(cont_id)
    export_dir = os.path.join(outdir, export_name)
    logger.debug("export_dir: {}".format(export_dir))

    # if directory exists, stop
    if os.path.exists(export_dir):
        logger.error(
            "SDS package export directory {} exists. Not continuing.".format(
                export_dir))
        return 1

    # create export directory
    validate_dir(export_dir)

    # download container
    get(cont_info['url'], export_dir)
    cont_info['url'] = os.path.basename(cont_info['url'])

    # query job specs
    job_specs = [
        i['_source']
        for i in run_query(mozart_es_url, "job_specs",
                           {"query": {
                               "term": {
                                   "container.raw": cont_id
                               }
                           }})
    ]
    logger.debug("job_specs: {}".format(json.dumps(job_specs, indent=2)))

    # pull hysds_ios for each job_spec and download any dependency images
    hysds_ios = []
    dep_images = {}
    for job_spec in job_specs:
        # download dependency images
        for d in job_spec.get('dependency_images', []):
            if d['container_image_name'] in dep_images:
                d['container_image_url'] = dep_images[
                    d['container_image_name']]
            else:
                # download container
                get(d['container_image_url'], export_dir)
                d['container_image_url'] = os.path.basename(
                    d['container_image_url'])
                dep_images[
                    d['container_image_name']] = d['container_image_url']

        # collect hysds_ios from mozart
        mozart_hysds_ios = [
            i['_source'] for i in run_query(
                mozart_es_url, "hysds_ios",
                {"query": {
                    "term": {
                        "job-specification.raw": job_spec['id']
                    }
                }})
        ]
        logger.debug("Found {} hysds_ios on mozart for {}.".format(
            len(mozart_hysds_ios), job_spec['id']))
        hysds_ios.extend(mozart_hysds_ios)

        # collect hysds_ios from mozart
        grq_hysds_ios = [
            i['_source'] for i in run_query(
                grq_es_url, "hysds_ios",
                {"query": {
                    "term": {
                        "job-specification.raw": job_spec['id']
                    }
                }})
        ]
        logger.debug("Found {} hysds_ios on grq for {}.".format(
            len(grq_hysds_ios), job_spec['id']))
        hysds_ios.extend(grq_hysds_ios)
    logger.debug("Found {} hysds_ios total.".format(len(hysds_ios)))

    # clean out allowed accounts
    for hysds_io in hysds_ios:
        if 'allowed_accounts' in hysds_io:
            del hysds_io['allowed_accounts']

    # dump manifest JSON
    manifest = {
        "containers": cont_info,
        "job_specs": job_specs,
        "hysds_ios": hysds_ios,
    }
    manifest_file = os.path.join(export_dir, 'manifest.json')
    with open(manifest_file, 'w') as f:
        json.dump(manifest, f, indent=2, sort_keys=True)

    # tar up hysds package
    tar_file = os.path.join(outdir, "{}.tar".format(export_name))
    with tarfile.open(tar_file, "w") as tar:
        tar.add(export_dir, arcname=os.path.relpath(export_dir, outdir))

    # remove package dir
    shutil.rmtree(export_dir)