Example #1
0
def bulk_load_trace_csv(f, method="upsert", skip_trace_records=False):
    requester = Requester(config.oeem.url, config.oeem.access_token)

    try:
        data = pd.read_csv(f, dtype=str).to_dict('records')
    except ValueError:
        # Assume this is an empty file error, which is ok
        return True

    unique_traces = list(set([
        (d["trace_id"], d["interpretation"], d["unit"], d.get("interval", None)) for d in data
    ]))

    trace_data = [
        {
            "trace_id": trace[0],
            "interpretation": trace[1],
            "unit": trace[2],
            "interval": trace[3],
            "added": datetime.utcnow().isoformat(),
            "updated": datetime.utcnow().isoformat(),
        } for trace in unique_traces
    ]

    trace_response = requester.post(
        constants.TRACE_BULK_UPSERT_VERBOSE_URL, trace_data)

    if skip_trace_records:
        return trace_response.status_code < 300

    trace_pks_by_id = {
        record["trace_id"]: record["id"]
        for record in trace_response.json()
    }

    def maybe_float(value):
        try: return float(value)
        except: return np.nan

    trace_record_data = [
        {
            "trace_id": trace_pks_by_id[record["trace_id"]],
            "value": maybe_float(record["value"]),
            "start": record["start"],
            "estimated": record["estimated"],
        }
        for record in data
    ]

    if method == "upsert":
        trace_record_response = requester.post(
            constants.TRACE_RECORD_BULK_UPSERT_URL, trace_record_data)
    elif method == "insert":
        trace_record_response = requester.post(
            constants.TRACE_RECORD_BULK_INSERT_URL, trace_record_data)
    return trace_record_response.status_code == 200
Example #2
0
def bulk_load_project_metadata_csv(f):
    requester = Requester(config.oeem.url, config.oeem.access_token)

    input_data = read_csv_file(f)

    if len(input_data) == 0:
        print("No data to upload.")
        return True

    # auto-detect wide/tall (pivoted, unpivoted) format
    columns = input_data[0].keys()
    data = []
    if set(columns) == set(['project_id', 'key', 'value']):
        # tall format
        for row in input_data:
            key = row['key']
            value = row['value']
            if value is None or value.strip() == '':
                continue
            data.append({
                'project_id': row['project_id'],
                'key': key.decode('utf-8').encode('utf-8'),
                'value': value.decode('utf-8').encode('utf-8')
            })
    else:
        # wide format
        for row in input_data:
            for key, value in row.items():
                if value is None:
                    continue
                if value.strip() == '':
                    continue
                if key == 'project_id':
                    continue
                data.append({
                    'project_id': row['project_id'],
                    'key': key.decode('utf-8').encode('utf-8'),
                    'value': value.decode('utf-8').encode('utf-8')
                })


    n = len(data)
    batch_size = 500
    print(
        "Uploading {} rows of metadata in {} batches of {}"
        .format(n, n/batch_size, batch_size)
    )

    success = []
    for batch in tqdm(batches(data, batch_size)):
        response = requester.post(constants.PROJECT_METADATA_BULK_UPSERT_URL, batch)
        success.append(response.status_code == 200)
    return all(success)
Example #3
0
def bulk_load_trace_blacklist(f):
    requester = Requester(config.oeem.url, config.oeem.access_token)

    input_data = read_csv_file(f)

    successes = []
    for batch in tqdm(batches(input_data, 500)):
        response = requester.post(
            constants.TRACE_BLACKLIST_UPSERT_VERBOSE_URL, batch)

        successes.append(response.status_code == 201)

    return all(successes)
Example #4
0
def bulk_load_project_csv(f):
    requester = Requester(config.oeem.url, config.oeem.access_token)
    data = pd.read_csv(f, dtype=str).to_dict('records')

    for record in data:
        # have to patch in project owner field from config
        record['project_owner_id'] = config.oeem.project_owner

        # have to patch in fields that are normally autopopulated
        record['added'] = datetime.utcnow().isoformat()
        record['updated'] = datetime.utcnow().isoformat()

    # only support upsert for now
    response = requester.post(constants.PROJECT_BULK_UPSERT_URL, data)
    return response.status_code == 200
Example #5
0
def bulk_load_project_trace_mapping_csv(f):
    requester = Requester(config.oeem.url, config.oeem.access_token)

    trace_ids = {d["trace_id"]: d["id"] for d in loaded_trace_ids()}
    project_ids = {d["project_id"]: d["id"] for d in loaded_project_ids()}

    raw_matches = pd.read_csv(f, dtype=str).to_dict('records')

    data = []
    for match in raw_matches:
        trace_id = trace_ids.get(match["trace_id"], None)
        project_id = project_ids.get(match["project_id"], None)
        if trace_id is not None and project_id is not None:
            data.append({
                "trace_id": trace_id,
                "project_id": project_id
            })

    for batch in tqdm(batches(data, 800)):
        response = requester.post(
            constants.PROJECT_TRACE_MAPPING_BULK_UPSERT_VERBOSE_URL, batch)

    return response.status_code == 201