コード例 #1
0
def main_loop():
    # requested query
    Service = request.query.Service
    #Converting the Service value to String
    Service = str(Service)

    csv = petl.fromcsv(file)
    response.headers['Content-type'] = 'application/json'
    response.headers['Access-Control-Allow-Origin'] = '*'

    for i in csv:
        if Service == i[1]:
            # select the data according to the given requested query
            dataSelect = petl.select(csv, "{Service} == '" + Service + "'")
            # cutting out the required column names
            jsonData = petl.cut(dataSelect, 'ClinicID', 'Suburb', 'Lat', 'Lon')
            # convert the dictionary data into json data
            jsonData = json.JSONEncoder().encode(list(petl.dicts(jsonData)))
            # return the json data
            return jsonData

        # this is requested link of getting all the distinct list of clinics offering any service.
        if Service == "0":
            anyServices = petl.unique(csv, key='Name')
            jsonData = petl.cut(anyServices, 'ClinicID', 'Suburb', 'Lat',
                                'Lon')
            jsonData = json.JSONEncoder().encode(list(petl.dicts(jsonData)))
            return jsonData
    else:
        jsonData = json.JSONEncoder().encode('Please Enter a Service.')
        return jsonData
コード例 #2
0
def main_loop():
    # requested query
    inputServiceID = request.query.serviceid
    csv = pt.fromcsv('clinicservicelocations.csv')
    response.headers['Content-type'] = 'application/json'
    response.headers['Access-Control-Allow-Origin'] = '*'
    for i in csv:
        if inputServiceID == i[0]:
            # select the data according to the given requested query
            dataSelect = pt.select(
                csv, "{ServiceID} == '" + str(inputServiceID) + "'")
            # cutting out the required column names
            jsonData = pt.cut(dataSelect, 'Name', 'Service', 'Suburb', 'State',
                              'Email', 'Lat', 'Lon')
            # convert the dictionary data into json data
            jsonData = json.JSONEncoder().encode(list(pt.dicts(jsonData)))
            # return the json data
            return jsonData

        # this is requested link of getting all the distinct list of
        # clinics offering any service.
        if inputServiceID == "0":
            anyServices = pt.unique(csv, key='Name')
            jsonData = pt.cut(anyServices, 'Name', 'Service', 'Suburb',
                              'State', 'Email', 'Lat', 'Lon')
            jsonData = json.JSONEncoder().encode(list(pt.dicts(jsonData)))
            return jsonData
    else:
        jsonData = json.JSONEncoder().encode('Unable to find this id.')
        return jsonData
コード例 #3
0
ファイル: analysis.py プロジェクト: ryantuck/lombardi
def analyze(season_bins, game_bins, season_param, game_param, position):

    # extract bucketed season / game performance
    gbs = game_buckets(game_param, game_bins, position)
    sbs = season_buckets(season_param, season_bins, position)

    # create probability distribution tables for seasons / games
    s_bucket_probs = bucket_dist(sbs)
    sg_bucket_probs = season_game_bucket_combo_probs(sbs, gbs)

    # generate likelihoods and prior
    s_bucket_dicts = list(petl.dicts(s_bucket_probs))
    sg_bucket_dicts = list(petl.dicts(sg_bucket_probs))
    s_buckets = sorted(set(s['bucket'] for s in s_bucket_dicts))

    likelihoods = {
        sb: bayes.Pdf({
            gb['g_bucket']: gb['prob']
            for gb in sg_bucket_dicts
            if gb['s_bucket'] == sb
        })
        for sb in s_buckets
    }

    prior = {s['bucket']: s['prob'] for s in s_bucket_dicts}

    # get appropriately-formed data
    data = data_dicts(sbs, gbs, game_param)

    # run analysis on data
    runs = conduct_runs(data, likelihoods, prior)

    # conduct rmse analysis overall and by bucket
    rmse_results = dict(
        overall=rmse_analysis(runs),
        by_bucket=dict(),
    )

    for sb in range(season_bins):
        tmp_runs = [r for r in runs if r['data']['season']['bucket'] == sb]
        rmse_results['by_bucket'][sb] = rmse_analysis(tmp_runs)

    # return all data!
    return dict(
        runs=runs,
        likelihoods={k:v.probs for k,v in likelihoods.items()},
        prior=prior,
        rmse_analysis=rmse_results,
    )
コード例 #4
0
ファイル: virtdb.py プロジェクト: SergeVL/petlsql
 def dicts(self, sql):
     try:
         if isinstance(sql, str):
             sql = execute(sql, self)
         return etl.dicts(sql)
     except SQLError as e:
         print(e)
コード例 #5
0
def drained_entries(ctx: typer.Context, issues, entries, project):
    config = ctx.meta['config']
    empty_entries, unset_entries = petl.biselect(
        entries, lambda row: row['issue_id'] is None)

    drain_issues = list(
        petl.dicts(
            transform.select_drain_issues(
                issues,
                assignee_id=ctx.meta['rdm_user']['id'],
                drain_cf_id=get_proj_attr(config, project,
                                          'rdm_drain_cf_id'))))

    if not len(drain_issues):
        log.error('No drain issues found')
        return petl.head(unset_entries, 0), entries

    if len(drain_issues) > 1:
        log.warning(
            f'Found {len(drain_issues)} drain issues. Will use only first one')

    drain_issue = drain_issues[0]
    drained = petl.addfield(petl.cutout(empty_entries, 'issue_id'), 'issue_id',
                            drain_issue['id'])
    return drained, unset_entries
コード例 #6
0
def anyServices():
    # requested query
    Postcode = request.query.Postcode
    #Converting the Service value to String
    Postcode = str(Postcode)
    # reading the csv file
    csv = petl.fromcsv(file)

    # json content type declaration
    response.headers['Content-type'] = 'application/json'
    response.headers['Access-Control-Allow-Origin'] = '*'
    for i in csv:
        if Postcode == i[4]:
            # select the data according to the given requested query
            dataSelect = petl.select(csv, "{Postcode} == '" + Postcode + "'")
            # cutting out the required column names
            jsonData = petl.cut(dataSelect, 'Service', 'Suburb')
            # convert the dictionary data into json data
            jsonData = json.JSONEncoder().encode(list(petl.dicts(jsonData)))
            # return the json data
            return jsonData

    else:
        jsonData = json.JSONEncoder().encode('Unable to find this Service.')
        return jsonData
コード例 #7
0
    def _get_record_identifiers(self, cfg):
        p = cfg.get('record_identifiers')
        if not (p and os.path.isfile(p)):
            p = os.path.join(cfg['root'], 'record_ids.json')

        table = petl.fromjson(p)
        return petl.dicts(table)
コード例 #8
0
    def sustain_summary(self):
        success, result = self.clip_and_dissolve_esri_feature_layer(
            feature_layer_query_url=RAINWAYS_RESOURCES['sustain'],
            feature_layer_fields=['GI_Type'],
            clipping_mask_gdf=self.aoi_gdf,
            clipping_mask_bbox=self.aoi_bbox,
            in_epsg_code=self.aoi_gdf_epsg)
        if success:
            # post-process the dataframe; add additional field
            t = etl\
                .fromdataframe(result)\
                .cutout('geometry')\
                .addfield('area_acres', lambda r: r['area'] * 0.00002295682)

            # convert to list of dictionaries
            results = list(etl.dicts(t))

            self.results.sustain.extend(results)

            return results

        else:
            self.status = 'failed'
            messages = result['error']['details'] + result['message']
            self.messages.extend(messages)

            return None
コード例 #9
0
ファイル: batch_upsert.py プロジェクト: sshd123/study-python3
def upsert_many(table, rows, keys, ensure=None, types=None):
    if isinstance(rows, Table):
        rows = etl.dicts(rows)
    elif isinstance(rows, DataFrame):
        rows = rows.to_dict(orient='records')
    for row in rows:
        table.upsert(row, keys, ensure, types)
コード例 #10
0
ファイル: logins.py プロジェクト: PGower/Unsync
def update_user_logins(data, url, api_key, source, account_id_field,
                       login_id_field, unique_id_field, password_field,
                       sis_user_id_field, integration_id_field, results_table):
    if not url.startswith('http') or not url.startswith('https'):
        url = 'https://' + url

    client = LoginsAPI(url, api_key)

    source = data.get(source)

    debug = data.config.debug

    results = []

    for row in petl.dicts(source):
        account_id = row[account_id_field]
        login_id = row[login_id_field]

        kwargs = {}
        if unique_id_field is not None and row[unique_id_field] is not None:
            kwargs['login_unique_id'] = row[unique_id_field]
        if password_field is not None and row[password_field] is not None:
            kwargs['login_password'] = row[password_field]
        if sis_user_id_field is not None and row[sis_user_id_field] is not None:
            kwargs['login_sis_user_id'] = row[sis_user_id_field]
        if integration_id_field is not None and row[
                integration_id_field] is not None:
            kwargs['login_integration_id'] = row[integration_id_field]

        try:
            r = client.edit_user_login(login_id, account_id, **kwargs)
            unsync.secho('Successfully updated login: {} with data: {}'.format(
                login_id, str(kwargs)),
                         fg='green')

            if results_table:
                row['_data'] = str(kwargs)
                row['_response_status'] = r
                row['_response_content'] = r
                results.append(row)

            if debug:
                unsync.secho(str(r), fg='yellow')
        except (CanvasAPIError) as e:
            unsync.secho('Failed updating login: {} with data: {}'.format(
                login_id, str(kwargs)),
                         fg='red')
            unsync.secho('Response Status: {} Response Reason: {}'.format(
                e.response.status_code, e.response.content),
                         fg='red')

            if results_table:
                row['_data'] = str(kwargs)
                row['_response_status'] = e.response.status_code
                row['_response_content'] = e.response.content
                results.append(row)

    results = petl.fromdicts(results)
    data.cat(results_table, results)
コード例 #11
0
def precip_table_etl_noaa(
    precip_table,
    rainfall_adjustment=1,
    frequency_min=1,
    frequency_max=1000,
    conversion_factor=2.54,
    desc_field="by duration for ARI (years):",
    duration_val="24-hr:"
    ):
    """
    Extract, Transform, and Load data from a NOAA PRECIPITATION FREQUENCY
    ESTIMATES matrix (in a csv) into an array used by the runoff calculator.
    
    Required Inputs:
        - precip_table: NOAA PRECIPITATION FREQUENCY ESTIMATES csv, in inches.
    Optional Inputs:
        - rainfall_adjustment: multipler to adjust for future rainfall
            conditions. defaults to 1.
        - frequency_min: the min. annual frequency to be returned. Default: 1
        - frequency_max: the max. annual frequency to be returned. Default: 1000
        - conversion_factor: apply to rainfall values. Default: 2.54
            (convert inches to centimeters).
        - desc_field: exact field name from NOAA table in first column.
            Defaults to "by duration for ARI (years):". Used for selecting
            data.
        - duration_val: exact row value in the desc_field from NOAA table that
            contains the duration of interest. Defaults to "24-hr:". Used for
            selecting data.
    Outputs:
        - precip_array: 1D array containing 24-hour duration estimate for
        frequencies 1,2,5,10,25,50,100,200,500,and 1000 year storm events
    """
    # load the csv table, skip the file header information, extract rows we need
    t1 = etl\
        .fromcsv(precip_table)\
        .skip(13)\
        .rowslice(0,19)
    # grab raw data from the row containing the x-hour duration event info
    t2 = etl\
        .select(t1, desc_field, lambda v: v == duration_val)\
        .cutout(desc_field)
    # generate a new header with only columns within frequency min/max
    h = tuple(
        i
        for i in list(etl.header(t2))
        if (int(i) >= frequency_min and int(i) <= frequency_max)
    )

    # for events within freq range, convert to cm, adjust for future rainfall
    t3 = etl\
        .cut(t2, h)\
        .convertall(lambda v: round(float(v) * conversion_factor * rainfall_adjustment, 2))
    # convert to a 1D array (values cast to floats)
    precips = list(etl.data(t3)[0])
    # also convert to a dictionary, for lookup by event
    precips_lookup = list(etl.dicts(t3))[0]
    # return 1D array and dictionary
    return precips, precips_lookup
コード例 #12
0
ファイル: tofrom.py プロジェクト: margotw40/parsons
    def to_dicts(self):
        """
        Output table as a list of dicts.

        `Returns:`
            list
        """

        return list(petl.dicts(self.table))
コード例 #13
0
def _medical_limits(id, source_db):
    """
    get the member limits
    """
    sql = ("SELECT dispensary_id, daily_purchase_limit, visit_purchase_limit, "
           "daily_visit_limit, two_week_purchase_limit "
           "FROM red_flags "
           "WHERE dispensary_id={0}").format(id)

    data = etl.fromdb(source_db, sql)
    limits = etl.select(data, lambda rec: rec.dispensary_id == id)
    return etl.dicts(limits)
コード例 #14
0
def anyServices():
    # reading the csv file
    csv = pt.fromcsv('services.csv')
    # json content type declaration
    response.headers['Content-type'] = 'application/json'
    response.headers['Access-Control-Allow-Origin'] = '*'
    # cutting out the required column names
    jsonData = pt.cut(csv, 'ServiceID', 'Service')
    # convert the dictionary data into json data
    jsonData = json.JSONEncoder().encode(list(pt.dicts(jsonData)))
    # returning the json data
    return jsonData
コード例 #15
0
ファイル: table.py プロジェクト: kcym-3c/parsons
    def __getitem__(self, index):

        self._index_count += 1
        if self._index_count >= DIRECT_INDEX_WARNING_COUNT:
            logger.warning("""
                You have indexed directly into this Table multiple times. This can be inefficient,
                as data transformations you've made will be computed _each time_ you index into the
                Table. If you are accessing many rows of data, consider switching to this style of
                iteration, which is much more efficient:
                `for row in table:`
                """)

        return petl.dicts(self.table)[index]
コード例 #16
0
def _get_taxes(id, source_db):
    """
    get the dispensary taxes settings for each dispensary_id
    """
    sql = ("SELECT DISTINCT dispensary_id, amount, name "
           "FROM taxes "
           "WHERE dispensary_id={0}").format(id)

    data = etl.fromdb(source_db, sql)
    try:
        lookup_taxes = etl.select(data, lambda rec: rec.dispensary_id == id)
        return etl.dicts(lookup_taxes)
    except KeyError:
        return 0
コード例 #17
0
ファイル: analysis.py プロジェクト: ryantuck/lombardi
def data_dicts(sbs, gbs, game_param):

    season_dicts = petl.dicts(sbs)
    game_dicts = petl.dicts(gbs)

    data = []

    for s in season_dicts:
        gd = [
            {
                k:v for k,v in x.items()
                if k in [game_param, 'week', 'bucket']
            }
            for x in game_dicts
            if x['year'] == s['year']
            and x['name'] == s['name']
        ]

        data.append(dict(
            season=s,
            games=sorted(gd, key=lambda r: r['week']),
        ))

    return data
コード例 #18
0
    def etl(self, record_id):
        for model in self.__models__:
            location_table = self.extract(model, record_id)
            nrows = petl.nrows(location_table)

            if nrows == 1:
                record = petl.dicts(location_table)[0]
                if self._has_observations(record):
                    self._added = True
                    location_id = self._post_location(record, model)
                    thing_id = self._post_thing(record, model, location_id)

                    self.add_package(record)
                    self.observation.etl(tids=self._make_tids(thing_id, record),
                                         models=(model,))
            else:
                print(f'multipe records found for given record_id. Skipping {record_id}')
コード例 #19
0
def dictFromTable():

    insights = {}
    #number of applications filed
    insights['Building Permits Filed'] = 0
    #number of commercial in the works
    insights['Commercial Project Count'] = 0
    #number of residential in the works
    insights['Residential Project Count'] = 0

    listings = []

    # print "in here "
    years = ['2016']
    quarters = ['Q1', 'Q2', 'Q3', 'Q4']

    tableList = openFileTable(years, quarters)

    for table in tableList:
        #transforms into dictionary

        d = etl.dicts(table)
        # print etl.look(table)
        #list of dictionaries
        b = list(d)

        for i in range(0, len(b)):
            #print b[i]
            if 'BP FILED' == b[i]['BESTSTAT']:
                insights['Building Permits Filed'] = insights[
                    'Building Permits Filed'] + 1

            if 'Resident' == b[i]['PROJECT_TYPE']:
                insights['Residential Project Count'] = insights[
                    'Residential Project Count'] + 1

            if 'Mixed' == b[i]['PROJECT_TYPE']:
                insights['Commercial Project Count'] = insights[
                    'Commercial Project Count'] + 1

            listings.append(b[i])

    listings.insert(0, insights)

    return listings
コード例 #20
0
def process_animal_extended(shelter_id, session, input_directory):
    table = petl.fromxls(os.path.join(input_directory,
                                      'AnimalIntakeExtended.xls'),
                         sheet='AnimalIntakeExtended')

    ## Because an animal can appear in the intake report more than once,
    ## we must sort the table in order to upsert the latest value
    table_sorted = petl.sort(table, key='Intake Date/Time')

    for row in petl.dicts(table_sorted):
        id = row['Animal ID']

        set_values = {
            'arn': normalize_string(row['ARN']),
            'name': normalize_string(row['Animal Name']),
            'species': normalize_string(row['Species']),
            'primary_breed': normalize_string(row['Primary Breed']),
            'secondary_bred': normalize_string(row['Secondary Breed']),
            'gender': normalize_string(row['Gender']),
            'pre_altered': to_bool(row['Pre Altered']),
            'altered': to_bool(row['Altered']),
            'primary_color': normalize_string(row['Primary Colour']),
            'secondary_color': normalize_string(row['Secondary Colour']),
            'third_color': normalize_string(row['Third Colour']),
            'color_pattern': normalize_string(row['Colour Pattern']),
            'second_color_pattern':
            normalize_string(row['Second Colour Pattern']),
            'size': normalize_string(row['Size'])
        }

        insert_stmt = insert(Animal)\
            .values(
                id=id,
                shelter_id=shelter_id, ## TODO: add to index for constraint? make composite pk?
                **set_values)\
            .on_conflict_do_update(
                constraint='animals_pkey',
                set_={
                    'shelter_id': shelter_id,
                    **set_values,
                    'updated_at': func.now()
                })

        session.execute(insert_stmt)
        session.commit()
コード例 #21
0
ファイル: observations.py プロジェクト: NMBGMR/WDIETL
    def _add_observations(self, datastream_id, records, model):
        if not isinstance(records, list):
            records = petl.dicts(records)

        for wti in tqdm(records):

            t = self._timestamp_extract(wti[model.timestamp_column])

            t = MT_TIMEZONE.localize(t)
            v = wti[model.mapped_column]
            if v is not None:
                payload = {
                    'phenomenonTime': t.isoformat(timespec='milliseconds'),
                    'resultTime': t.isoformat(timespec='milliseconds'),
                    'result': v,
                    'Datastream': make_id(datastream_id)
                }
                self._post_item(f'Observations', payload)
コード例 #22
0
    def row_data(self, row_index):
        """
        Returns a row in table

        `Args:`
            row_index: int
        `Returns:`
            dict
                A dictionary of the row with the column as the key and the cell
                as the value.
        """

        self._index_count += 1
        if self._index_count >= DIRECT_INDEX_WARNING_COUNT:
            logger.warning("""
                You have indexed directly into this Table multiple times. This can be inefficient,
                as data transformations you've made will be computed _each time_ you index into the
                Table. If you are accessing many rows of data, consider switching to this style of
                iteration, which is much more efficient:
                `for row in table:`
                """)

        return petl.dicts(self.table)[row_index]
コード例 #23
0
ファイル: test_util.py プロジェクト: pombredanne/petl
def test_dicts_shortrows():
    table = (('foo', 'bar'), ('a', 1), ('b', ))
    actual = dicts(table)
    expect = ({'foo': 'a', 'bar': 1}, {'foo': 'b', 'bar': None})
    ieq(expect, actual)
コード例 #24
0
ファイル: test_util.py プロジェクト: talwai/petl
def test_dicts():
    table = (("foo", "bar"), ("a", 1), ("b", 2))
    actual = dicts(table)
    expect = ({"foo": "a", "bar": 1}, {"foo": "b", "bar": 2})
    ieq(expect, actual)
コード例 #25
0
ファイル: test_util.py プロジェクト: pombredanne/petl
def test_dicts():
    table = (('foo', 'bar'), ('a', 1), ('b', 2))
    actual = dicts(table)
    expect = ({'foo': 'a', 'bar': 1}, {'foo': 'b', 'bar': 2})
    ieq(expect, actual)
コード例 #26
0
ファイル: import_qa_data.py プロジェクト: baggids/qcat
    def collect_import_objects(self):
        """
        Query and put together all QA objects which will be imported.
        """

        def get_tables(mappings):
            """
            Recursively collect all WOCAT tables of the mappings.

            Args:
                mappings: list.

            Returns:
                list. A list of tables.
            """
            tables = []
            for mapping in mappings:
                table = mapping.get('wocat_table')
                if table:
                    tables.append(table)
                tables.extend(get_tables(mapping.get('mapping', [])))
                tables.extend(get_tables(mapping.get('conditions', [])))
            return tables

        self.output('Fetching data from WOCAT QA database.', v=1)

        # Extend the default tables by adding the ones from the mapping.
        tables = self.default_tables
        for qg_properties in self.mapping.values():
            questions = qg_properties.get('questions', {})
            for q_properties in questions.values():
                tables.extend(get_tables(q_properties.get('mapping', [])))

        # Remove duplicates
        tables = list(set(tables))

        # Try to query the lookup table and collect its values.
        try:
            lookup_query = """
                    SELECT *
                    FROM {schema}.{table_name};
                """.format(schema=self.schema,
                           table_name=self.lookup_table_name)
            lookup_table = {}
            for row in petl.dicts(petl.fromdb(self.connection, lookup_query)):
                lookup_table[row.get('id')] = row
        except AttributeError:
            lookup_table = {}

        # So far, lookup_text is never used. Therefore it can be left empty.
        lookup_table_text = {}

        # Try to query file infos
        try:
            lookup_query_files = """
                    SELECT *
                    FROM {schema}.{table_name};
                """.format(schema=self.schema,
                           table_name=self.file_info_table)
            file_infos = {}
            for row in petl.dicts(
                    petl.fromdb(self.connection, lookup_query_files)):
                file_infos[row.get('blob_id')] = row
        except AttributeError:
            file_infos = {}

        for table_name in tables:
            query = 'SELECT {columns} FROM {schema}.{table_name};'.format(
                columns='*', schema=self.schema, table_name=table_name)

            queried_table = petl.fromdb(self.connection, query)
            row_errors = False
            for row in petl.dicts(queried_table):

                if row_errors is True:
                    continue

                # Inconsistent naming throughout the tables
                questionnaire_identifier = self.questionnaire_identifier
                if table_name == 'approach':
                    questionnaire_identifier = 'id'
                elif table_name == 'qa_quality_review':
                    questionnaire_identifier = 'qa_id'

                identifier = row.get(questionnaire_identifier)
                if identifier is None:
                    self.output('No identifier found for table "{}".'.format(
                        table_name), v=1, l='error')
                    row_errors = True

                if identifier in self.import_objects_exclude:
                    continue

                import_object = self.get_import_object(identifier)

                if import_object is None:
                    import_object = QAImportObject(
                        identifier, self.command_options, lookup_table,
                        lookup_table_text, file_infos, self.image_url)

                    import_object.add_custom_mapping_messages(
                        self.custom_mapping_messages)

                    self.import_objects.append(import_object)

                # Set the code if it is available in the current table
                code = row.get(self.questionnaire_code)
                if code:
                    import_object.set_code(code)

                # The main contributor is the compiler
                compiler_id = row.get(self.questionnaire_owner)

                if compiler_id:
                    # If the main contributer is "Not registered" (ID 661), use
                    # the default compiler
                    if compiler_id == 661:
                        compiler_id = self.default_compiler_id
                        import_object.add_mapping_message(
                            'Using "Unknown User" as compiler in QCAT as main '
                            'contributor in QA was "Not registered"')

                    # The following QAs have a main contributor which is not
                    # available through the API call. Set the default user and
                    # add a mapping message.
                    elif identifier in [131, 128, 89, 47, 106, 82, 195, 212,
                                        76, 107, 84, 139, 130, 276, 72, 147,
                                        138, 43, 44, 46, 49, 50, 52, 57, 173,
                                        171, 170, 166, 125, 78, 102, 45, 197,
                                        48]:
                        compiler_id = self.default_compiler_id
                        import_object.add_mapping_message(
                            'The compiler needs to be set manually. Use the '
                            'main contributor of QA.')

                    import_object.set_owner(compiler_id)

                # Use the creation date available on the approach table
                created = row.get('date')
                if created and table_name == 'approach':
                    creation_time = datetime.strptime(
                        created, WOCAT_DATE_FORMAT)
                    import_object.created = timezone.make_aware(
                        creation_time, timezone.get_current_timezone())

                import_object.add_wocat_data(table_name, row)
コード例 #27
0
ファイル: base.py プロジェクト: trb116/pythonanalyzer
etl.header(table)

# data()
########

import petl as etl
table = [['foo', 'bar'], ['a', 1], ['b', 2]]
d = etl.data(table)
list(d)

# dicts()
#########

import petl as etl
table = [['foo', 'bar'], ['a', 1], ['b', 2]]
d = etl.dicts(table)
d
list(d)

# namedtuples()
###############

import petl as etl
table = [['foo', 'bar'], ['a', 1], ['b', 2]]
d = etl.namedtuples(table)
d
list(d)

# records()
###############
コード例 #28
0
ファイル: test_util.py プロジェクト: talwai/petl
def test_dicts_shortrows():
    table = (("foo", "bar"), ("a", 1), ("b",))
    actual = dicts(table)
    expect = ({"foo": "a", "bar": 1}, {"foo": "b", "bar": None})
    ieq(expect, actual)
コード例 #29
0
ファイル: test_util.py プロジェクト: brutimus/petl
def test_dicts():
    table = (('foo', 'bar'), ('a', 1), ('b', 2))
    actual = dicts(table)
    expect = ({'foo': 'a', 'bar': 1}, {'foo': 'b', 'bar': 2})
    ieq(expect, actual)
コード例 #30
0
    def __iter__(self):

        return iter(petl.dicts(self.table))
コード例 #31
0
ファイル: test_util.py プロジェクト: brutimus/petl
def test_dicts_shortrows():
    table = (('foo', 'bar'), ('a', 1), ('b',))
    actual = dicts(table)
    expect = ({'foo': 'a', 'bar': 1}, {'foo': 'b', 'bar': None})
    ieq(expect, actual)
コード例 #32
0
ファイル: base.py プロジェクト: rogerkwoodley/petl
########

import petl as etl

table = [["foo", "bar"], ["a", 1], ["b", 2]]
d = etl.data(table)
list(d)


# dicts()
#########

import petl as etl

table = [["foo", "bar"], ["a", 1], ["b", 2]]
d = etl.dicts(table)
d
list(d)


# namedtuples()
###############

import petl as etl

table = [["foo", "bar"], ["a", 1], ["b", 2]]
d = etl.namedtuples(table)
d
list(d)

コード例 #33
0
def transform_and_aggregate_datetimes(query_results, rollup):
    """transform datetime to the correct TZ; aggregate the values in the query results 
    based on the datetime rollup args. Aggregation is performed for:

    * hourly or daily time intervals
    * total

    NOTE: in order to handle potential No-Data values in the DB during aggregation, we
    convert them to 0. The `src` field then indicates if any values in the rollup were N/D.
    Then, if the value field in the aggregated row still shows 0 after summation, *and*
    the src field shows N/D, we turn that zero into None. If there was a partial reading
    (e.g., the sensor has values for the first half hour but N/D for the second, and we are 
    doing an hourly rollup), then the values will stay there, but the source field will indicate
    both N/D and whatever the source was for the workable sensor values.

    TODO: move this work over to the database query

    """
    t1 = etl\
        .fromdicts(query_results)\
        .convert('xts', lambda v: v.astimezone(TZ).isoformat(), failonerror=True)
        #.rename('xts', 'ts')
    # print("t1")
    # print(t1)

    # print("rollup", rollup)
    if rollup in [INTERVAL_DAILY, INTERVAL_HOURLY]:

        petl_aggs = OrderedDict(
            val=('val', _sumround), # sum the rainfall vales
            src=('src', _listset) # create a list of all rainfall sources included in the rollup
        )

        t2 = etl\
            .convert(
                t1,
                'xts', 
                lambda v: _rollup_date(v, rollup), # convert datetimes to their rolled-up value in iso-format
                failonerror=True
            )\
            .convert(
                'val', 
                lambda v: 0 if v is None else v, # convert rainfall values to 0 if no-data
                failonerror=True
            )\
            .aggregate(
                ('xts', 'sid'), 
                petl_aggs # aggregate rainfall values (sum) and sources (list) for each timestamp+ID combo,
            )\
            .convert(
                'val', 
                lambda v, r: None if ('N/D' in r.src and v == 0) else v, # replace 0 values with no data if aggregated source says its N/D
                pass_row=True,
                failonerror=True
            )\
            .sort('sid')
            # .convert(
            #     'xts', 
            #     lambda v: TZ.localize(parse(v)).isoformat(), # convert that datetime to iso format w/ timezone
            #     failonerror=True
            # )
        # print("t2 time rollup")

    elif rollup in [INTERVAL_SUM]:

        petl_aggs = OrderedDict(
            val=('val', _sumround), # sum the rainfall vales
            src=('src', _listset), # create a list of all rainfall sources included in the rollup
            xts=('xts', _minmax) # create a iso datetime range string from the min and max datetimes found
        )

        t2 = etl\
            .aggregate(
                t1,
                'sid', 
                petl_aggs # aggregate rainfall values (sum) and sources (list), and datetimes (str) for each ID,
            )\
            .convert(
                'val', 
                lambda v, r: None if ('N/D' in r.src and v == 0) else v, # replace 0 values with no data if aggregated source says its N/D
                pass_row=True
            )\
            .sort('sid')\

        # print("t2 sum")

    else:
        t2 = t1
    # print("t2 = t1")

    # print(t2)
    # h = etl.header(t2)

    # rename the timestamp and sensor id fields, 
    # print("t2 header:", list(etl.header(t2)))
    # rename_kw = {}
    # for h1, h0 in [('xts', 'ts'), ('sid', 'id')]:
    #     if h1 in h:
    #         rename_kw[h1] = h0
    # if len(rename_kw.items()) > 0:
    #     {'xts':'ts', 'sid':'id'}
    t3 = etl.rename(t2, {'xts':'ts', 'sid':'id'}, strict=False)
    # else:
    #     t3 = t2
    # print("t3")
    # print(t3)

    # convert to list of dicts and return
    return list(etl.dicts(t3))
コード例 #34
0
ファイル: menu_items.py プロジェクト: tdelam/g1-etl
def transform(mmj_menu_items, mmj_categories, prices, organization_id,
              source_db, debug):
    """
    Transform data
    """
    # source data table
    source_dt = utils.view_to_list(mmj_menu_items)

    cut_menu_data = [
        'id', 'vendor_id', 'menu_id', 'dispensary_id', 'strain_id',
        'created_at', 'updated_at', 'category_id', 'name', 'sativa', 'indica',
        'on_hold', 'product_type', 'image_file_name', 'medicine_amount',
        'product_type'
    ]

    cut_prices = [
        'menu_item_id', 'dispensary_id', 'price_half_gram', 'price_gram',
        'price_two_gram', 'price_eigth', 'price_quarter', 'price_half',
        'price_ounce'
    ]

    # Cut out all the fields we don't need to load
    menu_items = etl.cut(source_dt, cut_menu_data)
    prices_data = etl.cut(prices, cut_prices)

    menu_items = (etl.addfield(
        menu_items, 'createdAtEpoch').addfield('unitOfMeasure').addfield(
            'locationProductDetails').addfield('keys').addfield('restockLevel')
                  )

    # Two-step transform and cut. First we need to cut the name
    # and id from the source data to map to.
    cut_source_cats = etl.cut(mmj_categories, 'name', 'id', 'measurement')
    source_values = etl.values(cut_source_cats, 'name', 'id')

    # Then we nede a dict of categories to compare against.
    # id is stored to match against when transforming and mapping categories
    mmj_categories = dict([(value, id) for (value, id) in source_values])

    mappings = OrderedDict()
    mappings['id'] = 'id'
    mappings['createdAt'] = 'created_at'
    mappings['updatedAt'] = 'updated_at'
    mappings['createdAtEpoch'] = lambda x: utils.create_epoch(x.created_at)
    mappings['name'] = 'name'
    mappings['shareOnWM'] = lambda x: _wm_integration(x.id, source_db)
    """
    1 = Units
    2 = Grams (weight)
    """
    mappings['unitOfMeasure'] = \
        lambda x: _map_uom(x.category_id, source_db)

    fields = etl.fieldmap(menu_items, mappings)
    data = etl.merge(menu_items, fields, key='id')

    items = []
    for item in etl.dicts(data):

        breakpoint_pricing = (etl.select(
            prices_data,
            lambda x: x.dispensary_id == item['dispensary_id']).rename({
                'price_eigth':
                'price_eighth'
            }).cutout('menu_item_id'))
        # Set image url for load to download
        url = None
        if debug and item['image_file_name'] is not None:
            url = ("https://wm-mmjmenu-images-development.s3."
                   "amazonaws.com/menu_items/images/{0}/large/"
                   "{1}").format(item['id'], item['image_file_name'])
        elif item['image_file_name'] is not None:
            url = ("https://wm-mmjmenu-images-production.s3."
                   "amazonaws.com/menu_items/images/{0}/large/"
                   "{1}").format(item['id'], item['image_file_name'])

        item['image_file_name'] = url

        item['categoryId'] = _map_categories(item['category_id'],
                                             item['sativa'], item['indica'],
                                             mmj_categories, menu_items)
        item['keys'] = {
            'dispensary_id': item['dispensary_id'],
            'id': item['id'],
            'menu_id': item['menu_id'],
            'vendor_id': item['vendor_id'],
            'strain_id': item['strain_id'],
            'category_id': item['category_id']
        }

        # set a default netMJ value if the menu item is a unit product
        if item['unitOfMeasure'] is 2:
            item['netMarijuana'] = int(item['medicine_amount'])

        for key in item['keys'].keys():
            if not item['keys'][key]:
                del item['keys'][key]

        item['locationProductDetails'] = {
            'id': item['id'],
            'active': _active(item['on_hold'])
        }

        item['restockLevel'] = _restock_level(item['dispensary_id'],
                                              item['product_type'], source_db)

        if item['shareOnWM'] is None:
            item['shareOnWM'] = False

        for price in etl.dicts(breakpoint_pricing):
            try:
                price_two_gram = price['price_two_gram']
            except KeyError:
                price_two_gram = 0.0

            item['locationProductDetails']['weightPricing'] = {
                'price_half_gram':
                utils.dollars_to_cents(price['price_half_gram']),
                'price_two_gram': utils.dollars_to_cents(price_two_gram),
                'price_gram': utils.dollars_to_cents(price['price_gram']),
                'price_eighth': utils.dollars_to_cents(price['price_eighth']),
                'price_quarter':
                utils.dollars_to_cents(price['price_quarter']),
                'price_half': utils.dollars_to_cents(price['price_half']),
                'price_ounce': utils.dollars_to_cents(price['price_ounce'])
            }

        del item['vendor_id']
        del item['indica']
        del item['dispensary_id']
        del item['id']
        del item['strain_id']
        del item['on_hold']
        del item['menu_id']
        del item['sativa']
        del item['category_id']
        del item['updated_at']
        del item['created_at']
        del item['product_type']

        if item['image_file_name'] is None:
            del item['image_file_name']

        # set up final structure for API
        items.append(item)

    # Remove inactive items
    for item in items:
        if item['locationProductDetails']['active'] is False:
            items.remove(item)

    if debug:
        result = json.dumps(items,
                            sort_keys=True,
                            indent=4,
                            default=utils.json_serial)
        print(result)

    return items
コード例 #35
0
def transform(source_data, organization_id, debug):
    """
    Load the transformed data into the destination(s)
    """
    # source data table
    source_dt = utils.view_to_list(source_data)

    cut_data = [
        'id', 'dispensary_id', 'picture_file_name', 'name', 'email',
        'address', 'phone_number', 'dob', 'license_type', 'registry_no',
        'membership_id', 'given_caregivership', 'tax_exempt',
        'drivers_license_no', 'points', 'locked_visits',
        'locked_visits_reason', 'caregiver_id', 'picture_file_name',
        'card_expires_at', 'created_at', 'updated_at', 'physician_id',
        'custom_membership_id', 'organization_membership_id', 'city',
        'state', 'zip_code', 'address', 'organization_id'
    ]
    member_data = etl.cut(source_dt, cut_data)

    members = (
        etl
        .addfield(member_data, 'identificationType')
        .addfield('createdAtEpoch')
    )

    member_mapping = OrderedDict()

    member_mapping['id'] = 'id'
    member_mapping['caregiver_id'] = 'caregiver_id'
    member_mapping['dispensary_id'] = 'dispensary_id'
    member_mapping['physician_id'] = 'physician_id'
    member_mapping['custom_membership_id'] = 'custom_membership_id'
    member_mapping['organization_membership_id'] = 'organization_membership_id'
    member_mapping['picture_file_name'] = 'picture_file_name'
    member_mapping['dateOfBirth'] = 'dob'
    member_mapping['name'] = 'name'
    member_mapping['phone_number'] = 'phone_number'
    member_mapping['email'] = 'email'
    member_mapping['organization_id'] = 'organization_id'
    # MEDICAL 1, RECREATIONAL 2
    member_mapping['memberType'] = \
        lambda m: 'MEDICAL' if m.license_type == 1 else 'RECREATIONAL'

    member_mapping['mmjCard'] = 'registry_no'
    member_mapping['isCaregiver'] = \
        lambda x: utils.true_or_false(x.given_caregivership)
    member_mapping['identificationNumber'] = 'drivers_license_no'
    member_mapping['points'] = 'points'
    member_mapping['card_expires_at'] = 'card_expires_at'
    member_mapping['taxExempt'] = lambda x: utils.true_or_false(x.tax_exempt)
    member_mapping['locked_visits'] = 'locked_visits'
    member_mapping['locked_visits_reason'] = 'accountStatusNotes'
    member_mapping['address'] = 'address'
    member_mapping['city'] = 'city'
    member_mapping['zip_code'] = 'zip_code'
    member_mapping['state'] = 'state'
    member_mapping['createdAt'] = 'created_at'
    member_mapping['updatedAt'] = 'updated_at'

    member_mapping['accountStatus'] = \
        lambda x: utils.account_status(x.locked_visits)

    member_fields = etl.fieldmap(members, member_mapping)

    members = []
    for item in etl.dicts(member_fields):
        
        item['keys'] = {
            'id': item['id'],
            'caregiver_id': item['caregiver_id'],
            'dispensary_id': item['dispensary_id'],
            'physician_id': item['physician_id'],
            'custom_membership_id': item['custom_membership_id'],
            'organization_membership_id': item['organization_membership_id'],
            'picture_file_name': item['picture_file_name'],
            'organization_id': item['organization_id'],
        }

        if item['card_expires_at'] is not None:
            item['expiryDate'] = item['card_expires_at']

        # remove any item['keys'] tuples with None values
        for key in item['keys'].keys():
            if not item['keys'][key]:
                del item['keys'][key]

        # set up final structure for API
        item['identificationType'] = 'Drivers License'

        # We may not need this in the data
        item['address'] = [{
            'line1': item['address'],
            'city': item['city'],
            'state': item['state'],
            'zip': item['zip_code'],
        }]

        # replace None value dobs with the epoch beginning of time
        if not item['dateOfBirth']:
            dob = time.strftime('%Y-%m-%d %H:%M:%S.000Z', time.gmtime(0))
            item['dateOfBirth'] = datetime.strptime(dob, '%Y-%m-%d %H:%M:%S.000Z')

        #del item['address']
        del item['city']
        del item['zip_code']
        del item['state']
        del item['dispensary_id']
        del item['id']
        del item['physician_id']
        del item['caregiver_id']
        del item['custom_membership_id']
        del item['organization_membership_id']
        del item['organization_id']
        del item['picture_file_name']
        del item['locked_visits_reason']
        del item['locked_visits']
        del item['card_expires_at']
        
        members.append(item)

    if debug:
        result = json.dumps(members, sort_keys=True,
                            indent=4, default=utils.json_serial)
        print(result)

    return members
コード例 #36
0
    """Return a list of valid NUTS codes."""

    with open(GEOCODES_FILE) as stream:
        lines = csv.DictReader(stream)
        geocodes = []
        for i, line in enumerate(lines):
            # The first line has an empty NUTS-code
            if i > 0:
                geocode = line['NUTS-Code']
                geocodes.append(geocode)

    logging.debug('Loaded %d NUTS geocodes', len(geocodes))
    return tuple(geocodes)


GEOCODES = list(dicts(fromcsv(GEOCODES_FILE)))


def get_all_codelists():
    """Return all codelists as a dictionary of dictionaries."""

    codelists = {}

    for codelist_file in os.listdir(CODELISTS_DIR):
        codelist_name, _ = os.path.splitext(codelist_file)
        codelist = get_codelist(codelist_name)
        codelists.update({codelist_name: codelist})

    return codelists

コード例 #37
0
def sales_summary(start_dt=None, end_dt=None):
    """tally up gross (sale over list) profits
    TODO: tally up net profites (gross profit vs inventory purchase total)

    TODO: Keyword Arguments:
        start_dt {[type]} -- datetime for start of query (default: {None})
        end_dt {[type]} -- datetime for start of query [description] (default: {None})

    Returns:
        [dict] -- various types of sales information, stored in a dictionary.
    """

    # products = db.session.query(Product).all()
    # sales = db.session.query(Sale).all()

    # retrieve existing tables
    products_records = etl.fromdb(db.engine, 'SELECT * FROM product')
    sales_records = etl.fromdb(db.engine, 'SELECT * FROM sale')

    # join product info to sales data
    sales_data = etl.join(sales_records,
                          products_records,
                          lkey='product_id',
                          rkey='id')

    # prep joined sales data for tabulation
    sales_data = etl.convert(sales_data, 'date', lambda dt: format_date(dt))
    sales_data = etl.sort(sales_data, 'date')
    sales_data = etl.convert(sales_data, 'quantity',
                             lambda q: handle_none(q, replace_with=1))
    sales_data = etl.addfield(sales_data, 'profit',
                              lambda rec: calculate_profit(rec))
    sales_data = etl.addfield(sales_data, 'gross_sales',
                              lambda rec: calculate_gross_sales(rec))

    # summarize data into charting-friendly data structures
    chart_count = etl.fold(sales_data,
                           'date',
                           operator.add,
                           'quantity',
                           presorted=True)
    chart_count = etl.rename(chart_count, {'key': 'x', 'value': 'y'})
    chart_count, chart_count_missing_date = etl.biselect(
        chart_count, lambda rec: rec.x is not None)
    # print(chart_count)
    # etl.lookall(chart_count)

    chart_gross = etl.fold(sales_data,
                           'date',
                           operator.add,
                           'gross_sales',
                           presorted=True)
    chart_gross = etl.rename(chart_gross, {'key': 'x', 'value': 'y'})
    chart_gross, chart_gross_missing_date = etl.biselect(
        chart_gross, lambda rec: rec.x is not None)
    # print(chart_gross)
    # etl.lookall(chart_gross)

    chart_profit = etl.fold(sales_data,
                            'date',
                            operator.add,
                            'profit',
                            presorted=True)
    chart_profit = etl.rename(chart_profit, {'key': 'x', 'value': 'y'})
    chart_profit, chart_profit_missing_date = etl.biselect(
        chart_profit, lambda rec: rec.x is not None)

    # tabulate some figures
    gross_sales = 0
    profits = 0
    for sale in etl.dicts(sales_data):
        profits += calculate_profit(sale)
        gross_sales += calculate_gross_sales(sale)

    # for i in etl.dicts(chart_count):
    #     print(i)
    # for i in etl.dicts(chart_gross):
    #     print(i)

    return {
        'gross_sales': gross_sales,
        'profits': profits,
        'chart_gross': list(etl.dicts(chart_gross)),
        'chart_gross_missing_date': list(etl.dicts(chart_gross_missing_date)),
        'chart_profit': list(etl.dicts(chart_profit)),
        'chart_profit_missing_date':
        list(etl.dicts(chart_profit_missing_date)),
        'chart_count': list(etl.dicts(chart_count)),
        'chart_count_missing_date': list(etl.dicts(chart_count_missing_date))
    }
コード例 #38
0
    def __repr__(self):

        return repr(petl.dicts(self.table))