Example #1
0
def add_filters_item(filters, es_url, es_in_index, es_out_index):

    elastic_in = ElasticSearch(es_url, es_in_index)
    elastic_out = ElasticSearch(es_url, es_out_index)

    # Time to just copy from in_index to our_index
    total = elastic_out.bulk_upload_sync(fetch(elastic_in, filters), "uuid")
Example #2
0
    ARGS = get_params()

    if ARGS.debug:
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s %(message)s')
        logging.debug("Debug mode activated")
    else:
        logging.basicConfig(level=logging.INFO,
                            format='%(asctime)s %(message)s')

    logging.info("Importing items from %s to %s/%s", ARGS.collection,
                 ARGS.elastic_url, ARGS.index)

    elastic = ElasticSearch(ARGS.elastic_url, ARGS.index)

    if ARGS.collection:
        mongo_items = fetch_mongodb_collection(ARGS.collection,
                                               ARGS.mongo_host,
                                               ARGS.mongo_port)
    elif ARGS.project:
        mongo_items = fetch_mongodb_project(ARGS.project, ARGS.mongo_host,
                                            ARGS.mongo_port)
    elif ARGS.all_collections:
        mongo_items = fetch_mongodb_all(ARGS.mongo_host, ARGS.mongo_port)
    else:
        raise RuntimeError('Collection to be processed not provided')

    if mongo_items:
        logging.info("Loading collections in Elasticsearch")
        elastic.bulk_upload_sync(mongo_items, "id")
Example #3
0
                        help="Get metrics for data source")

    return parser.parse_args()


if __name__ == '__main__':

    args = get_params()

    if args.debug:
        logging.basicConfig(level=logging.DEBUG,
                            format='[%(asctime)s] %(message)s')
        logging.debug("Debug mode activated")
    else:
        logging.basicConfig(level=logging.INFO,
                            format='%(asctime)s %(message)s')
    logging.getLogger("urllib3").setLevel(logging.WARNING)
    logging.getLogger("requests").setLevel(logging.WARNING)

    data_source = args.data_source
    index = "grimoirelab_metrics"

    elastic = ElasticSearch(args.elastic_url, index)
    if args.elastic_metrics_url:
        elastic = ElasticSearch(args.elastic_metrics_url, index)

    elastic.bulk_upload_sync(fetch_metric(args.elastic_url, data_source), "id")

    # for metric in fetch_metric(es_url, data_source):
    #    print(metric)
Example #4
0
def export_items(elastic_url,
                 in_index,
                 out_index,
                 elastic_url_out=None,
                 search_after=False,
                 search_after_value=None,
                 limit=None,
                 copy=False):
    """ Export items from in_index to out_index using the correct mapping """

    if not limit:
        limit = DEFAULT_LIMIT

    if search_after_value:
        search_after_value_timestamp = int(search_after_value[0])
        search_after_value_uuid = search_after_value[1]
        search_after_value = [
            search_after_value_timestamp, search_after_value_uuid
        ]

    logging.info("Exporting items from %s/%s to %s", elastic_url, in_index,
                 out_index)

    count_res = requests.get('%s/%s/_count' % (elastic_url, in_index))
    try:
        count_res.raise_for_status()
    except requests.exceptions.HTTPError:
        if count_res.status_code == 404:
            logging.error("The index does not exists: %s", in_index)
        else:
            logging.error(count_res.text)
        sys.exit(1)

    logging.info("Total items to copy: %i", count_res.json()['count'])

    # Time to upload the items with the correct mapping
    elastic_in = ElasticSearch(elastic_url, in_index)
    if not copy:
        # Create the correct mapping for the data sources detected from in_index
        ds_mapping = find_mapping(elastic_url, in_index)
    else:
        logging.debug('Using the input index mapping')
        ds_mapping = extract_mapping(elastic_url, in_index)

    if not elastic_url_out:
        elastic_out = ElasticSearch(elastic_url,
                                    out_index,
                                    mappings=ds_mapping)
    else:
        elastic_out = ElasticSearch(elastic_url_out,
                                    out_index,
                                    mappings=ds_mapping)

    # Time to just copy from in_index to our_index
    uid_field = find_uuid(elastic_url, in_index)
    backend = find_perceval_backend(elastic_url, in_index)
    if search_after:
        total = elastic_out.bulk_upload_sync(
            fetch(elastic_in, backend, limit, search_after_value,
                  scroll=False), uid_field)
    else:
        total = elastic_out.bulk_upload_sync(fetch(elastic_in, backend, limit),
                                             uid_field)

    logging.info("Total items copied: %i", total)
Example #5
0
        for dependency in dependencies:
            eitem = enrich_item(dependency)
            eitem['project'] = project
            yield eitem


if __name__ == '__main__':

    args = get_params()

    if args.debug:
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s %(message)s')
        logging.debug("Debug mode activated")
    else:
        logging.basicConfig(level=logging.INFO,
                            format='%(asctime)s %(message)s')

    logging.info("Importing items from %s to %s/%s", args.file,
                 args.elastic_url, args.index)

    elastic = ElasticSearch(args.elastic_url, args.index)

    items = fetch_dependencies(args.file, args.project)

    if items:
        logging.info("Loading dependencies in Elasticsearch ...")
        elastic.bulk_upload_sync(items, "uuid")
        logging.info("Import completed.")