def _rebuild(self, csv_files=None): """ Implement rebuild command :param csv_files: sequence of paths to .csv files for input :type csv_files: sequence of str :return: Nothing :rtype: None """ self._clear_index() conn = solr_connection("ati") lc = LocalCKAN() if csv_files: for csv_file in csv_files: print csv_file + ":" for org_id, records in csv_data_batch(csv_file, TARGET_DATASET): try: org_detail = lc.action.organization_show(id=org_id) except NotFound: continue print " {0:s} {1}".format(org_id, len(records)) _update_records(records, org_detail, conn) else: for org_id in lc.action.organization_list(): count = 0 org_detail = lc.action.organization_show(id=org_id) for records in data_batch(org_detail["id"], lc, TARGET_DATASET): _update_records(records, org_detail, conn) count += len(records) print org_id, count
def _rebuild(self, csv_file=None): """ Implement rebuild command :param csv_file: path to .csv file for input :type csv_file: str :return: Nothing :rtype: None """ self._clear_index() conn = solr_connection(self.command_name) lc = LocalCKAN() if csv_file: count = {} for org_id, records in csv_data_batch(csv_file, self.command_name): if org_id not in count: count[org_id] = 0 org_detail = lc.action.organization_show(id=org_id) _update_records(records, org_detail, conn, self.command_name) count[org_id] += len(records) for org_id in lc.action.organization_list(): print org_id, count.get(org_id, 0) else: for org in lc.action.organization_list(): count = 0 org_detail = lc.action.organization_show(id=org) for records in data_batch(org_detail["id"], lc, self.command_name): _update_records(records, org_detail, conn, self.command_name) count += len(records) print org, count
def _rebuild(self, csv_file=None): """ Implement rebuild command :param csv_file: path to .csv file for input :type csv_file: str :return: Nothing :rtype: None """ conn = solr_connection('proactive_disclosure') lc = LocalCKAN() if csv_file: count = {} for org_recs in csv_data_batch(csv_file, TARGET_DATASET): org_id = org_recs.keys()[0] if org_id not in count: count[org_id] = 0 org_detail = lc.action.organization_show(id=org_id) records = org_recs[org_id] _update_records(records, org_detail, conn) count[org_id] += len(records) for org_id in lc.action.organization_list(): print org_id, count.get(org_id, 0) else: for org in lc.action.organization_list(): count = 0 org_detail = lc.action.organization_show(id=org) for records in data_batch(org_detail['id'], lc, TARGET_DATASET): _update_records(records, org_detail, conn) count += len(records) print org, count
def rebuild(command_name, csv_files=None, solr_url=None, strict=True): """ Implement rebuild command :param csv_file: path to .csv file for input :type csv_file: str :return: Nothing :rtype: None """ clear_index(command_name, solr_url, False) conn = solr_connection(command_name, solr_url) lc = LocalCKAN() if csv_files: for csv_file in csv_files: print csv_file + ':' prev_org = None unmatched = None firstpart, filename = os.path.split(csv_file) assert filename.endswith('.csv') resource_name = filename[:-4] chromo = get_chromo(resource_name) geno = get_geno(chromo['dataset_type']) for org_id, records in csv_data_batch(csv_file, chromo, strict=strict): records = [ dict((k, safe_for_solr(v)) for k, v in row_dict.items()) for row_dict in records ] if org_id != prev_org: unmatched = None try: org_detail = lc.action.organization_show(id=org_id) except NotFound: continue print " {0:s} {1}".format(org_id, len(records)) unmatched = _update_records(records, org_detail, conn, resource_name, unmatched) else: for org in lc.action.organization_list(): count = 0 org_detail = lc.action.organization_show(id=org) unmatched = None for resource_name, records in data_batch(org_detail['id'], lc, command_name): unmatched = _update_records(records, org_detail, conn, resource_name, unmatched) count += len(records) print org, count print "commit" conn.commit()
def rebuild(command_name, csv_files=None, solr_url=None): """ Implement rebuild command :param csv_file: path to .csv file for input :type csv_file: str :return: Nothing :rtype: None """ clear_index(command_name, solr_url, False) conn = solr_connection(command_name, solr_url) lc = LocalCKAN() if csv_files: for csv_file in csv_files: print csv_file + ':' prev_org = None unmatched = None firstpart, filename = os.path.split(csv_file) assert filename.endswith('.csv') resource_name = filename[:-4] chromo = get_chromo(resource_name) geno = get_geno(chromo['dataset_type']) for org_id, records in csv_data_batch(csv_file, chromo): records = [dict((k, safe_for_solr(v)) for k, v in row_dict.items()) for row_dict in records] if org_id != prev_org: unmatched = None try: org_detail = lc.action.organization_show(id=org_id) except NotFound: continue print " {0:s} {1}".format(org_id, len(records)) unmatched = _update_records( records, org_detail, conn, resource_name, unmatched) else: for org in lc.action.organization_list(): count = 0 org_detail = lc.action.organization_show(id=org) unmatched = None for resource_name, records in data_batch(org_detail['id'], lc, command_name): unmatched = _update_records( records, org_detail, conn, resource_name, unmatched) count += len(records) print org, count print "commit" conn.commit()
def _rebuild(self, csv_files=None, solr_url=None, strict=True): """ Implement rebuild command :param csv_files: sequence of paths to .csv files for input :type csv_files: sequence of str :return: Nothing :rtype: None """ self._clear_index(solr_url, False) conn = solr_connection('ati', solr_url) lc = LocalCKAN() if csv_files: for csv_file in csv_files: print csv_file + ':' firstpart, filename = os.path.split(csv_file) assert filename.endswith('.csv') resource_name = filename[:-4] chromo = get_chromo(resource_name) geno = get_geno(chromo['dataset_type']) assert geno.get('target_dataset') == TARGET_DATASET for org_id, records in csv_data_batch(csv_file, chromo, strict=strict): records = [ dict((k, safe_for_solr(v)) for k, v in row_dict.items()) for row_dict in records ] try: org_detail = lc.action.organization_show(id=org_id) except NotFound: continue print " {0:s} {1}".format(org_id, len(records)) _update_records(records, org_detail, conn) else: for org_id in lc.action.organization_list(): count = 0 org_detail = lc.action.organization_show(id=org_id) for resource_name, records in data_batch( org_detail['id'], lc, TARGET_DATASET): _update_records(records, org_detail, conn) count += len(records) print org_id, count print "commit" conn.commit()
def _rebuild(self, csv_files=None, solr_url=None): """ Implement rebuild command :param csv_files: sequence of paths to .csv files for input :type csv_files: sequence of str :return: Nothing :rtype: None """ self._clear_index(solr_url, False) conn = solr_connection('ati', solr_url) lc = LocalCKAN() if csv_files: for csv_file in csv_files: print csv_file + ':' firstpart, filename = os.path.split(csv_file) assert filename.endswith('.csv') resource_name = filename[:-4] chromo = get_chromo(resource_name) geno = get_geno(chromo['dataset_type']) assert geno.get('target_dataset') == TARGET_DATASET for org_id, records in csv_data_batch(csv_file, chromo): records = [dict((k, safe_for_solr(v)) for k, v in row_dict.items()) for row_dict in records] try: org_detail = lc.action.organization_show(id=org_id) except NotFound: continue print " {0:s} {1}".format(org_id, len(records)) _update_records(records, org_detail, conn) else: for org_id in lc.action.organization_list(): count = 0 org_detail = lc.action.organization_show(id=org_id) for resource_name, records in data_batch(org_detail['id'], lc, TARGET_DATASET): _update_records(records, org_detail, conn) count += len(records) print org_id, count print "commit" conn.commit()
def _rebuild(self, csv_files=None): """ Implement rebuild command :param csv_files: sequence of paths to .csv files for input :type csv_files: sequence of str :return: Nothing :rtype: None """ self._clear_index() conn = solr_connection('ati_summaries') lc = LocalCKAN() if csv_files: count = {} for csv_file in csv_files: print csv_file + ':' count[csv_file] = {} for org_recs in csv_data_batch(csv_file, TARGET_DATASET): org_id = org_recs.keys()[0] if org_id not in count[csv_file]: count[csv_file][org_id] = 0 try: org_detail = lc.action.organization_show(id=org_id) except NotFound: continue records = org_recs[org_id] _update_records(records, org_detail, conn) count[csv_file][org_id] += len(records) for k, v in count[csv_file].iteritems(): print " {0:s} {1}".format(k, v) for org_id in lc.action.organization_list(): print org_id, sum((count[f].get(org_id, 0) for f in count)) else: for org_id in lc.action.organization_list(): count = 0 org_detail = lc.action.organization_show(id=org_id) for records in data_batch(org_detail['id'], lc, TARGET_DATASET): _update_records(records, org_detail, conn) count += len(records) print org_id, count
def rebuild(command_name, csv_files=None): """ Implement rebuild command :param csv_file: path to .csv file for input :type csv_file: str :return: Nothing :rtype: None """ clear_index(command_name) conn = solr_connection(command_name) lc = LocalCKAN() if csv_files: for csv_file in csv_files: print csv_file + ':' unmatched = None for resource_name, org_id, records in csv_data_batch(csv_file, command_name): try: org_detail = lc.action.organization_show(id=org_id) except NotFound: continue print " {0:s} {1}".format(org_id, len(records)) unmatched = _update_records( records, org_detail, conn, resource_name, unmatched) else: for org in lc.action.organization_list(): count = 0 org_detail = lc.action.organization_show(id=org) unmatched = None for resource_name, records in data_batch(org_detail['id'], lc, command_name): unmatched = _update_records( records, org_detail, conn, resource_name, unmatched) count += len(records) print org, count