Esempio n. 1
0
def test():
    base_dir = os.path.join(os.getcwd(), "data")
    print "base_dir: " + base_dir

    target_spire = Target()
    target_spire['identifier'] = 'spire'
    target_spire['title'] = 'Sciences Po Institutional Repository'
    target_spire['type'] = 'oaipmh'
    target_spire[
        'url'] = 'http://spire.sciences-po.fr/dissemination/oaipmh2-publications.xml'
    target_spire['metadata_prefix'] = 'mods'

    test_date_from = datetime(2012, 10, 1, 12, 30, 59, tzinfo=None)
    test_date_until = datetime(2013, 4, 30, 17, 50, 1, tzinfo=None)
    test_identifier_book = 'oai:spire.sciences-po.fr:2441/dambferfb7dfprc9m26c8c8o3'
    test_identifier_bookPart = 'oai:spire.sciences-po.fr:2441/eo6779thqgm5r489makgoai85'
    test_identifier_masterThesis = 'oai:spire.sciences-po.fr:2441/5l6uh8ogmqildh09h6m8hj429'
    test_identifier_doctoralThesis = 'oai:spire.sciences-po.fr:2441/3fm4jv3k2s99lms9jb5i5asil'
    test_identifier_professoralThesis = 'oai:spire.sciences-po.fr:2441/f4rshpf3v1umfa09lb0joe5g5'

    test_set = 'SHS:ART'

    #result = oaipmh_harvester.identifiy(target_spire)
    #dump_result(result)

    #result = oaipmh_harvester.list_metadata_formats(target_spire, test_identifier_01)
    #dump_result(result)

    #result = oaipmh_harvester.list_sets(target_spire)
    #dump_result(result)

    result = oaipmh_harvester.get_record(target_spire,
                                         identifier=test_identifier_bookPart)
    dump_result(result)
def test():
    base_dir = os.path.join(os.getcwd(), "data")
    print "base_dir: " + base_dir

    target_spire = Target()
    target_spire['identifier'] = 'spire'
    target_spire['title'] = 'Sciences Po Institutional Repository'
    target_spire['type'] = 'oaipmh'
    target_spire['url'] = 'http://spire.sciences-po.fr/dissemination/oaipmh2-publications.xml'
    target_spire['metadata_prefix'] = 'mods'

    test_date_from = datetime(2012, 10, 1, 12, 30, 59, tzinfo=None)
    test_date_until = datetime(2013, 4, 30, 17, 50, 1, tzinfo=None)
    test_identifier_book = 'oai:spire.sciences-po.fr:2441/dambferfb7dfprc9m26c8c8o3'
    test_identifier_bookPart = 'oai:spire.sciences-po.fr:2441/eo6779thqgm5r489makgoai85'
    test_identifier_masterThesis = 'oai:spire.sciences-po.fr:2441/5l6uh8ogmqildh09h6m8hj429'
    test_identifier_doctoralThesis = 'oai:spire.sciences-po.fr:2441/3fm4jv3k2s99lms9jb5i5asil'
    test_identifier_professoralThesis = 'oai:spire.sciences-po.fr:2441/f4rshpf3v1umfa09lb0joe5g5'
    
    test_set = 'SHS:ART'

    #result = oaipmh_harvester.identifiy(target_spire)
    #dump_result(result)

    #result = oaipmh_harvester.list_metadata_formats(target_spire, test_identifier_01)
    #dump_result(result)

    #result = oaipmh_harvester.list_sets(target_spire)
    #dump_result(result)

    result = oaipmh_harvester.get_record(target_spire, identifier=test_identifier_bookPart)
    dump_result(result)
Esempio n. 3
0
def harvest_by_ids(corpus, target, ids):
    logging.info("harvest_by_ids: {}".format(ids))
    date_begin = datetime.datetime.now()

    # harvest
    metajson_list = []
    for identifier in ids:
        metajson_list.append(oaipmh_harvester.get_record(target, identifier))
    date_harvest = datetime.datetime.now()
    chrono.chrono_trace("harvest spire and convert to metajson", date_begin, date_harvest, len(ids))

    # import
    result_import = corpus_service.import_metajson_list(corpus, metajson_list, True, None)
    date_import = datetime.datetime.now()
    chrono.chrono_trace("import", date_harvest, date_import, len(result_import))
Esempio n. 4
0
def harvest_by_ids(corpus, target, ids):
    logging.info("harvest_by_ids: {}".format(ids))
    date_begin = datetime.datetime.now()

    # harvest
    metajson_list = []
    for identifier in ids:
        metajson_list.append(oaipmh_harvester.get_record(target, identifier))
    date_harvest = datetime.datetime.now()
    chrono.chrono_trace("harvest spire and convert to metajson", date_begin,
                        date_harvest, len(ids))

    # import
    result_import = corpus_service.import_metajson_list(
        corpus, metajson_list, True, None)
    date_import = datetime.datetime.now()
    chrono.chrono_trace("import", date_harvest, date_import,
                        len(result_import))