Example #1
0
    def _rebuild(self, csv_files=None):
        """
        Implement rebuild command

        :param csv_files: sequence of paths to .csv files for input
        :type csv_files: sequence of str

        :return: Nothing
        :rtype: None
        """
        self._clear_index()

        conn = solr_connection("ati")
        lc = LocalCKAN()
        if csv_files:
            for csv_file in csv_files:
                print csv_file + ":"
                for org_id, records in csv_data_batch(csv_file, TARGET_DATASET):
                    try:
                        org_detail = lc.action.organization_show(id=org_id)
                    except NotFound:
                        continue
                    print "    {0:s} {1}".format(org_id, len(records))
                    _update_records(records, org_detail, conn)
        else:
            for org_id in lc.action.organization_list():
                count = 0
                org_detail = lc.action.organization_show(id=org_id)
                for records in data_batch(org_detail["id"], lc, TARGET_DATASET):
                    _update_records(records, org_detail, conn)
                    count += len(records)
                print org_id, count
Example #2
0
    def _rebuild(self, csv_file=None):
        """
        Implement rebuild command

        :param csv_file: path to .csv file for input
        :type csv_file: str

        :return: Nothing
        :rtype: None
        """
        self._clear_index()

        conn = solr_connection(self.command_name)
        lc = LocalCKAN()
        if csv_file:
            count = {}
            for org_id, records in csv_data_batch(csv_file, self.command_name):
                if org_id not in count:
                    count[org_id] = 0
                org_detail = lc.action.organization_show(id=org_id)
                _update_records(records, org_detail, conn, self.command_name)
                count[org_id] += len(records)
            for org_id in lc.action.organization_list():
                print org_id, count.get(org_id, 0)
        else:
            for org in lc.action.organization_list():
                count = 0
                org_detail = lc.action.organization_show(id=org)
                for records in data_batch(org_detail["id"], lc, self.command_name):
                    _update_records(records, org_detail, conn, self.command_name)
                    count += len(records)
                print org, count
Example #3
0
    def _rebuild(self, csv_file=None):
        """
        Implement rebuild command

        :param csv_file: path to .csv file for input
        :type csv_file: str

        :return: Nothing
        :rtype: None
        """
        conn = solr_connection('proactive_disclosure')
        lc = LocalCKAN()
        if csv_file:
            count = {}
            for org_recs in csv_data_batch(csv_file, TARGET_DATASET):
                org_id = org_recs.keys()[0]
                if org_id not in count:
                    count[org_id] = 0
                org_detail = lc.action.organization_show(id=org_id)
                records = org_recs[org_id]
                _update_records(records, org_detail, conn)
                count[org_id] += len(records)
            for org_id in lc.action.organization_list():
                print org_id, count.get(org_id, 0)
        else:
            for org in lc.action.organization_list():
                count = 0
                org_detail = lc.action.organization_show(id=org)
                for records in data_batch(org_detail['id'], lc, TARGET_DATASET):
                    _update_records(records, org_detail, conn)
                    count += len(records)
                print org, count
Example #4
0
def rebuild(command_name, csv_files=None, solr_url=None, strict=True):
    """
    Implement rebuild command

    :param csv_file: path to .csv file for input
    :type csv_file: str

    :return: Nothing
    :rtype: None
    """
    clear_index(command_name, solr_url, False)

    conn = solr_connection(command_name, solr_url)
    lc = LocalCKAN()
    if csv_files:
        for csv_file in csv_files:
            print csv_file + ':'
            prev_org = None
            unmatched = None
            firstpart, filename = os.path.split(csv_file)
            assert filename.endswith('.csv')
            resource_name = filename[:-4]

            chromo = get_chromo(resource_name)
            geno = get_geno(chromo['dataset_type'])

            for org_id, records in csv_data_batch(csv_file,
                                                  chromo,
                                                  strict=strict):
                records = [
                    dict((k, safe_for_solr(v)) for k, v in row_dict.items())
                    for row_dict in records
                ]
                if org_id != prev_org:
                    unmatched = None
                try:
                    org_detail = lc.action.organization_show(id=org_id)
                except NotFound:
                    continue
                print "    {0:s} {1}".format(org_id, len(records))
                unmatched = _update_records(records, org_detail, conn,
                                            resource_name, unmatched)
    else:
        for org in lc.action.organization_list():
            count = 0
            org_detail = lc.action.organization_show(id=org)
            unmatched = None
            for resource_name, records in data_batch(org_detail['id'], lc,
                                                     command_name):
                unmatched = _update_records(records, org_detail, conn,
                                            resource_name, unmatched)
                count += len(records)
            print org, count

    print "commit"
    conn.commit()
Example #5
0
def rebuild(command_name, csv_files=None, solr_url=None):
    """
    Implement rebuild command

    :param csv_file: path to .csv file for input
    :type csv_file: str

    :return: Nothing
    :rtype: None
    """
    clear_index(command_name, solr_url, False)

    conn = solr_connection(command_name, solr_url)
    lc = LocalCKAN()
    if csv_files:
        for csv_file in csv_files:
            print csv_file + ':'
            prev_org = None
            unmatched = None
            firstpart, filename = os.path.split(csv_file)
            assert filename.endswith('.csv')
            resource_name = filename[:-4]

            chromo = get_chromo(resource_name)
            geno = get_geno(chromo['dataset_type'])

            for org_id, records in csv_data_batch(csv_file, chromo):
                records = [dict((k, safe_for_solr(v)) for k, v in
                            row_dict.items()) for row_dict in records]
                if org_id != prev_org:
                    unmatched = None
                try:
                    org_detail = lc.action.organization_show(id=org_id)
                except NotFound:
                    continue
                print "    {0:s} {1}".format(org_id, len(records))
                unmatched = _update_records(
                    records, org_detail, conn, resource_name, unmatched)
    else:
        for org in lc.action.organization_list():
            count = 0
            org_detail = lc.action.organization_show(id=org)
            unmatched = None
            for resource_name, records in data_batch(org_detail['id'], lc, command_name):
                unmatched = _update_records(
                    records, org_detail, conn, resource_name, unmatched)
                count += len(records)
            print org, count

    print "commit"
    conn.commit()
Example #6
0
    def _rebuild(self, csv_files=None, solr_url=None, strict=True):
        """
        Implement rebuild command

        :param csv_files: sequence of paths to .csv files for input
        :type csv_files: sequence of str

        :return: Nothing
        :rtype: None
        """
        self._clear_index(solr_url, False)

        conn = solr_connection('ati', solr_url)
        lc = LocalCKAN()
        if csv_files:
            for csv_file in csv_files:
                print csv_file + ':'
                firstpart, filename = os.path.split(csv_file)
                assert filename.endswith('.csv')
                resource_name = filename[:-4]

                chromo = get_chromo(resource_name)
                geno = get_geno(chromo['dataset_type'])
                assert geno.get('target_dataset') == TARGET_DATASET

                for org_id, records in csv_data_batch(csv_file,
                                                      chromo,
                                                      strict=strict):
                    records = [
                        dict((k, safe_for_solr(v))
                             for k, v in row_dict.items())
                        for row_dict in records
                    ]
                    try:
                        org_detail = lc.action.organization_show(id=org_id)
                    except NotFound:
                        continue
                    print "    {0:s} {1}".format(org_id, len(records))
                    _update_records(records, org_detail, conn)
        else:
            for org_id in lc.action.organization_list():
                count = 0
                org_detail = lc.action.organization_show(id=org_id)
                for resource_name, records in data_batch(
                        org_detail['id'], lc, TARGET_DATASET):
                    _update_records(records, org_detail, conn)
                    count += len(records)
                print org_id, count

        print "commit"
        conn.commit()
Example #7
0
    def _rebuild(self, csv_files=None, solr_url=None):
        """
        Implement rebuild command

        :param csv_files: sequence of paths to .csv files for input
        :type csv_files: sequence of str

        :return: Nothing
        :rtype: None
        """
        self._clear_index(solr_url, False)

        conn = solr_connection('ati', solr_url)
        lc = LocalCKAN()
        if csv_files:
            for csv_file in csv_files:
                print csv_file + ':'
                firstpart, filename = os.path.split(csv_file)
                assert filename.endswith('.csv')
                resource_name = filename[:-4]

                chromo = get_chromo(resource_name)
                geno = get_geno(chromo['dataset_type'])
                assert geno.get('target_dataset') == TARGET_DATASET

                for org_id, records in csv_data_batch(csv_file, chromo):
                    records = [dict((k, safe_for_solr(v)) for k, v in
                            row_dict.items()) for row_dict in records]
                    try:
                        org_detail = lc.action.organization_show(id=org_id)
                    except NotFound:
                        continue
                    print "    {0:s} {1}".format(org_id, len(records))
                    _update_records(records, org_detail, conn)
        else:
            for org_id in lc.action.organization_list():
                count = 0
                org_detail = lc.action.organization_show(id=org_id)
                for resource_name, records in data_batch(org_detail['id'], lc, TARGET_DATASET):
                    _update_records(records, org_detail, conn)
                    count += len(records)
                print org_id, count

        print "commit"
        conn.commit()
Example #8
0
    def _rebuild(self, csv_files=None):
        """
        Implement rebuild command

        :param csv_files: sequence of paths to .csv files for input
        :type csv_files: sequence of str

        :return: Nothing
        :rtype: None
        """
        self._clear_index()

        conn = solr_connection('ati_summaries')
        lc = LocalCKAN()
        if csv_files:
            count = {}
            for csv_file in csv_files:
                print csv_file + ':'
                count[csv_file] = {}
                for org_recs in csv_data_batch(csv_file, TARGET_DATASET):
                    org_id = org_recs.keys()[0]
                    if org_id not in count[csv_file]:
                        count[csv_file][org_id] = 0
                    try:
                        org_detail = lc.action.organization_show(id=org_id)
                    except NotFound:
                        continue
                    records = org_recs[org_id]
                    _update_records(records, org_detail, conn)
                    count[csv_file][org_id] += len(records)
                for k, v in count[csv_file].iteritems():
                    print "    {0:s} {1}".format(k, v)
            for org_id in lc.action.organization_list():
                print org_id, sum((count[f].get(org_id, 0) for f in count))
        else:
            for org_id in lc.action.organization_list():
                count = 0
                org_detail = lc.action.organization_show(id=org_id)
                for records in data_batch(org_detail['id'], lc, TARGET_DATASET):
                    _update_records(records, org_detail, conn)
                    count += len(records)
                print org_id, count
Example #9
0
def rebuild(command_name, csv_files=None):
    """
    Implement rebuild command

    :param csv_file: path to .csv file for input
    :type csv_file: str

    :return: Nothing
    :rtype: None
    """
    clear_index(command_name)

    conn = solr_connection(command_name)
    lc = LocalCKAN()
    if csv_files:
        for csv_file in csv_files:
            print csv_file + ':'
            unmatched = None
            for resource_name, org_id, records in csv_data_batch(csv_file, command_name):
                try:
                    org_detail = lc.action.organization_show(id=org_id)
                except NotFound:
                    continue
                print "    {0:s} {1}".format(org_id, len(records))
                unmatched = _update_records(
                    records, org_detail, conn, resource_name, unmatched)
    else:
        for org in lc.action.organization_list():
            count = 0
            org_detail = lc.action.organization_show(id=org)
            unmatched = None
            for resource_name, records in data_batch(org_detail['id'], lc, command_name):
                unmatched = _update_records(
                    records, org_detail, conn, resource_name, unmatched)
                count += len(records)
            print org, count