def get_company_settings():
    company_settings = {}
    companies_by_types = N1QLRequest(
        _N1QLQuery('SELECT META().id,* FROM ce WHERE type=$1', 3), cb)
    for comp_obj in companies_by_types:
        if comp_obj['ce'].get('companyState') != 'ACTIVE':
            continue
        company_settings[comp_obj['id']] = comp_obj['ce']
    return company_settings
def read_company_settings(limit=10, offset=0):
    company_settings = []
    query = _N1QLQuery('SELECT META().id,* FROM ce WHERE type=$1', 3)
    query.timeout = 8000
    companies_by_types = N1QLRequest(query, cb_read)
    # companies_by_types = N1QLRequest(
    #     _N1QLQuery('SELECT META().id,* FROM ce WHERE type=$1 order by id LIMIT $2 OFFSET $3', 3, limit, offset), cb)
    for comp_obj in companies_by_types:
        company_settings.append(comp_obj['id'])
    return company_settings
Beispiel #3
0
    def to_n1ql_query(self, statement, *options, **kwargs):
        # lets make a copy of the options, and update with kwargs...
        args = self.copy()
        args.update(kwargs)

        # now lets get positional parameters.  Actual positional
        # params OVERRIDE positional_parameters
        positional_parameters = args.pop('positional_parameters', [])
        if options and len(options) > 0:
            positional_parameters = options

        # now the named parameters.  NOTE: all the kwargs that are
        # not VALID_OPTS must be named parameters, and the kwargs
        # OVERRIDE the list of named_parameters
        new_keys = list(filter(lambda x: x not in self.VALID_OPTS,
                               args.keys()))
        named_parameters = args.pop('named_parameters', {})
        for k in new_keys:
            named_parameters[k] = args[k]

        query = _N1QLQuery(statement, *positional_parameters,
                           **named_parameters)
        # now lets try to setup the options.  TODO: rework this after beta.3
        # but for now we will use the existing _N1QLQuery.  Could be we can
        # add to it, etc...

        # default to false on metrics
        query.metrics = args.get('metrics', False)

        # TODO: there is surely a cleaner way...
        for k in self.VALID_OPTS:
            v = args.get(k, None)
            if v:
                if k == 'scan_consistency':
                    query.consistency = v.value
                if k == 'consistent_with':
                    query.consistent_with = v
                if k == 'adhoc':
                    query.adhoc = v
                if k == 'timeout':
                    query.timeout = v.total_seconds()
                if k == 'scan_cap':
                    query.scan_cap = v
                if k == 'pipeline_batch':
                    query.pipeline_batch = v
                if k == 'pipeline_cap':
                    query.pipeline_cap = v
                if k == 'read_only':
                    query.readonly = v
                if k == 'profile':
                    query.profile = v.value
        return query
def read_media_by_company(comp_id, offset=0):
    global processed_companies, invalid_data_writer
    print(f'Reading company data for company - {comp_id}')
    path = get_dir('downloaded', sub_dir)
    file = os.path.join(path, f'downloaded_{comp_id}.csv')
    if os.path.exists(file):
        offset = get_offset(file)
    fl = get_dir('string_ids', sub_dir)
    str_ids = open(f'{fl}/string_ids_{comp_id}.csv', 'w')
    str_writer = csv.writer(str_ids)
    limit = 100
    cb.n1ql_timeout = 8000
    with open(file, 'a') as f:
        csv_writer = csv.writer(f)
        while True:
            media_records = []
            try:
                query = _N1QLQuery(
                    'SELECT META().id, * FROM ce WHERE companyId=$1 and type in $2 order by id LIMIT $3 OFFSET $4',
                    comp_id, mediaTypes, limit, offset)
                query.timeout = 8000
                medias = N1QLRequest(query, cb)  #long media id
                # if medias.metrics.get('resultCount', 0) == 0 :
                #     break
                for media_obj in medias:
                    try:
                        mId = int(media_obj['id'])
                    except Exception as e:
                        str_writer.writerow([json.dumps(media_obj)])
                        continue
                    media_records.append([json.dumps(media_obj)])

                if len(media_records) == 0:
                    break

                print(
                    f'Successfully read medias from db - {comp_id} - offset - {offset} - limit - {limit}'
                )
                csv_writer.writerows(media_records)
                offset += limit

            except Exception as e:
                failed_db_writer.writerow([comp_id, offset])
                print(
                    f"Exception reading medias from db - {comp_id} - offset - {offset}"
                )
                return

    str_ids.close()
    print(f'Finished Reading company data for company - {comp_id}')
def read_company_settings_from_db():
    print('Reading company settings from db...')
    company_settings = {}
    companies_by_types = N1QLRequest(
        _N1QLQuery('SELECT META().id,* FROM ce WHERE type=$1', 3), cb)
    for comp_obj in companies_by_types:
        if comp_obj['ce'].get('companyState') != 'ACTIVE':
            continue
        company_settings[comp_obj['id']] = comp_obj['ce']

    fl = f'company_settings_{sub_dir}.csv'
    with open(fl, 'w') as f:
        csv_writer = csv.writer(f)
        for cId, cObj in company_settings.items():
            csv_writer.writerow([cId, json.dumps(cObj)])
    print('Successfully read company settings')
def get_media_count_by_company(company_settings, comp_type):
    comp_list = get_companies_by_type(company_settings, comp_type)
    comp_map = {}
    for comp in comp_list:
        medias = N1QLRequest(
            _N1QLQuery(
                'SELECT count(*) as cnt FROM ce WHERE companyId=$1 and type in $2',
                comp, mediaTypes), cb)
        # if medias.metrics.get('resultCount', 0) == 0:
        #     break
        medias = list(medias)[0]
        comp_map[comp] = medias.get('cnt', 0)

    with open(f'{comp_type}_companies_cnt.csv', 'w') as f:
        writer = csv.writer(f)
        for comp in sorted(comp_map, key=comp_map.get):
            writer.writerow([comp, comp_map[comp]])