def postProcessing(options):
    """
        Executes some curating operations over imported data
    """

    init_logger()

    logger.info('> Executing ckan postprocessing... ')
    ds_name = None
    if ('ds_name' in options.keys()):
        ds_name = options['ds_name']

    user = None
    passw = None
    try:
        config = ConfigParser.RawConfigParser()
        config.read('ConfigFile.properties')
        usertemp = config.get('AuthenticationSection', 'database.user')
        passwtemp = config.get('AuthenticationSection', 'database.password')
        user = usertemp
        passw = passwtemp
    except Exception as e:
        logger.info("Not user info found, using anonymous user... ")
        logger.info(e)

    dbFactory = DBFactory()
    dbManager = dbFactory.get_default_db_manager_with_username(
        ds_name, user, passw)

    # We want to change all courses from mygoblet.org tagged as Training Materials
    ckan_conditions = [[
        'AND',
        [['EQ', 'link', 'http://www.mygoblet.org//training-portal/courses/*'],
         ['EQ', 'resource_type', ["Training Material"]]]
    ]]

    previous_count = dbManager.count_data_by_conditions(ckan_conditions)
    #print (previous_count)
    results = dbManager.get_data_by_conditions(ckan_conditions)
    # delete all of them , and then we insert them again modified. We will have to implement update operation in AbstractManager
    dbManager.delete_data_by_conditions(ckan_conditions)
    new_count = dbManager.count_data_by_conditions(ckan_conditions)
    #print (new_count)
    numSuccess = 0
    for result in results:
        #print (result)
        exists = util.existURL(result.get("link"))
        # logger.info ('Exists? '+get_link(record)+' :'+str(exists))
        if (exists):
            success = dbManager.insert_data({
                "title":
                result.get("title"),
                "description":
                result.get("description"),
                "field":
                result.get("field"),
                "source":
                result.get("source"),
                "resource_type":
                ["Event"],  # Now they are Events, not Training Materials!
                "insertion_date":
                result.get("insertion_date"),
                "created":
                result.get("created"),
                "audience":
                result.get("audience"),
                "link":
                result.get("link")
            })
            if success:
                numSuccess = numSuccess + 1
    #print (numSuccess)
    logger.info('Changed ' + str(numSuccess) +
                ' mygoblet.org records tagged as Training Materials to Events')
    logger.info('< Finished ckan postprocessing')
def main_options(options):
    """
        Executes the main functionality of this script: it extracts JSON data from each Training Material found
        and inserts its main data into the DB.
        * options {list} specific configurations for initialization.
            ds_name {string} specific dataset/database to use with the DB manager
            delete_all_old_data {boolean} specifies if we should delete all previous ckanData in our DataBase
            registriesFromTime {datetime} time from registries will be obtained
            updateRegistries {boolean} if we want to get new regiestries or not

    """

    init_logger()

    ds_name = None
    delete_all_old_data = False
    registriesFromTime = None
    updateRegistries = True

    if options is not None:
        logger.info('>> Starting ckanData importing process... params: ')
        if ('ds_name' in options.keys()):
            ds_name = options['ds_name']
            logger.info('ds_name=' + ds_name)
        if ('delete_all_old_data' in options.keys()):
            delete_all_old_data = options['delete_all_old_data']
            logger.info('delete_all_old_data=' + str(delete_all_old_data))
        if ('registriesFromTime' in options.keys()):
            registriesFromTime = options['registriesFromTime']
            logger.info('registriesFromTime=' + str(registriesFromTime))
        if ('updateRegistries' in options.keys()):
            updateRegistries = options['updateRegistries']
            logger.info('updateRegistries=' + str(updateRegistries))

    else:
        logger.info('>> Starting ckanData importing process...')

    materials_names = None
    if updateRegistries:
        materials_names = get_materials_names()

    user = None
    passw = None
    try:
        config = ConfigParser.RawConfigParser()
        config.read('ConfigFile.properties')
        usertemp = config.get('AuthenticationSection', 'database.user')
        passwtemp = config.get('AuthenticationSection', 'database.password')
        user = usertemp
        passw = passwtemp
    except Exception as e:
        logger.info("Not user info found, using anonymous user... ")
        logger.info(e)

    dbFactory = DBFactory()
    dbManager = dbFactory.get_default_db_manager_with_username(
        ds_name, user, passw)

    # print (dbManager)
    if (delete_all_old_data is not None and delete_all_old_data):
        ckan_conditions = [['EQ', 'source', get_source_field()]]
        previous_count = dbManager.count_data_by_conditions(ckan_conditions)
        dbManager.delete_data_by_conditions(ckan_conditions)
        new_count = dbManager.count_data_by_conditions(ckan_conditions)
        if (previous_count is not None and new_count is not None):
            logger.info('Deleted ' + str((previous_count - new_count)) +
                        ' registries')

    if materials_names is not None:
        numSuccess = 0
        for material_name in materials_names:
            json_data = get_json_from_material_name(material_name)
            if (json_data is not None):
                # If we have registriesFromTime, we have to check that each one's creation date if more recent than registriesFromTime
                if registriesFromTime is None or isDataMoreRecentThan(
                        json_data, registriesFromTime):
                    success = dbManager.insert_data({
                        "title":
                        get_title(json_data),
                        "description":
                        get_notes(json_data),
                        "field":
                        get_field(json_data),
                        "source":
                        get_source_field(),
                        "resource_type":
                        get_resource_type_field(),
                        "insertion_date":
                        get_insertion_date_field(),
                        "created":
                        get_created(json_data),
                        "audience":
                        get_audience(json_data),
                        "link":
                        get_link(json_data)
                    })
                    if success:
                        numSuccess = numSuccess + 1

        logger.info('Inserted ' + str(numSuccess) + ' new registries')

    if updateRegistries:
        postProcessing(options)

    logger.info('<< Finished ckanData importing process.')
Ejemplo n.º 3
0
def main_options(options):
    """
        Executes the main functionality of this script: it extracts JSON data from each record found on Elixir's registry
        and inserts its main data into the DB.
        * options {list} specific configurations for initialization.
            ds_name {string} specific dataset/database to use with the DB manager
            delete_all_old_data {boolean} specifies if we should delete all previous Elixir registry data in our DataBase
            registriesFromTime {date} time from registries will be obtained
            updateRegistries {boolean} if we want to get new regiestries or not

            
        In this script we will insert these fields into each registry:
            "title" {string} Title for the data registry.
            "notes" {string} Description for the data registry.
            "link" {string} Link to the data registry.
            "field" {string} Default ('Services Registry');
            "source" {string} Default ('ckan');
            "insertion date" {date} Current date and time.

    """

    init_logger()

    ds_name = None
    delete_all_old_data = False
    registriesFromTime = None
    updateRegistries = True

    if options is not None:
        logger.info(
            '>> Starting Elixir registry importing process... params: ')
        if ('ds_name' in options.keys()):
            ds_name = options['ds_name']
            logger.info('ds_name=' + ds_name)
        if ('delete_all_old_data' in options.keys()):
            delete_all_old_data = options['delete_all_old_data']
            logger.info('delete_all_old_data=' + str(delete_all_old_data))
        if ('updateRegistries' in options.keys()):
            updateRegistries = options['updateRegistries']
            logger.info('updateRegistries=' + str(updateRegistries))

    else:
        logger.info('>> Starting Elixir registry importing process...')

    records = None
    if updateRegistries:
        records = get_records()

    user = None
    passw = None
    try:
        config = ConfigParser.RawConfigParser()
        config.read('ConfigFile.properties')
        usertemp = config.get('AuthenticationSection', 'database.user')
        passwtemp = config.get('AuthenticationSection', 'database.password')
        user = usertemp
        passw = passwtemp
    except Exception as e:
        logger.info("Not user info found, using anonymous user... ")
        logger.info(e)

    dbFactory = DBFactory()
    dbManager = dbFactory.get_default_db_manager_with_username(
        ds_name, user, passw)

    if (delete_all_old_data is not None and delete_all_old_data):
        registry_conditions = [['EQ', 'source', get_source_field()]]
        previous_count = dbManager.count_data_by_conditions(
            registry_conditions)
        dbManager.delete_data_by_conditions(registry_conditions)
        new_count = dbManager.count_data_by_conditions(registry_conditions)
        if (previous_count is not None and new_count is not None):
            logger.info('Deleted ' + str((previous_count - new_count)) +
                        ' registries')

    if records is not None:

        numSuccess = 0
        for record in records:
            # exists = util.existURL(get_link(record))
            # logger.info ('Exists? '+get_link(record)+' :'+str(exists))
            # if (exists):
            success = dbManager.insert_data({
                "title":
                get_title(record),
                "description":
                get_description(record),
                "link":
                get_link(record),
                "field":
                get_field(record),
                "source":
                get_source_field(),
                "resource_type":
                get_resource_type_field(record),
                "insertion_date":
                get_insertion_date_field()
            })
            if success:
                numSuccess = numSuccess + 1

        logger.info('Inserted ' + str(numSuccess) + ' new registries')

    logger.info('<< Finished Elixir registry importing process...')
Ejemplo n.º 4
0
 def setUp(self):
     self.dbFactory = DBFactory()
     self.dbManager = self.dbFactory.get_specific_db_manager_and_schema(
         'SOLR', 'test_core')
def main_options(options):
    """
        Executes the main functionality of this script: it extracts information from iAnn events data and insert some of them
        into the DB
        * options {list} specific configurations for initialization.
            ds_name: specific dataset/database to use with the DB manager
            delete_all_old_data {boolean} specifies if we should delete all previous ckanData in our DataBase
            registriesFromTime {datetime} time from registries will be obtained
            updateRegistries {boolean} if we want to get new regiestries or not
               
        
        In this script we will insert these fields into each registry:
            "title" {string} Title for the event;
            "start" - Date the event starts;
            "end" - Date the event ends;
            "city" {string} City where the event is hosted;
            "country" {string} Country where the event is hosted;
            "field" {string} Branches of science in molecular biology.
            "provider" {string} Organization hosting the event;
            "link" {string} Link to the data registry.
            "source" {string} Default ('ckan');
            "insertion date" {date} Current date and time.
            "created" {date} Date and time of creation of the original registry.

        See more eg: http://iann.pro/iann-web-services
    """
    # IannDataLocking.lock()
    init_logger()

    ds_name = None
    delete_all_old_data = False
    registriesFromTime = None
    updateRegistries = True

    if options is not None:
        logger.info('>> Starting iann importing process... params: ')
        if ('ds_name' in options.keys()):
            ds_name = options['ds_name']
            logger.info('ds_name=' + ds_name)
        if ('delete_all_old_data' in options.keys()):
            delete_all_old_data = options['delete_all_old_data']
            logger.info('delete_all_old_data=' + str(delete_all_old_data))
        if ('registriesFromTime' in options.keys()):
            registriesFromTime = options['registriesFromTime']
            logger.info('registriesFromTime=' + str(registriesFromTime))
        if ('updateRegistries' in options.keys()):
            updateRegistries = options['updateRegistries']
            logger.info('updateRegistries=' + str(updateRegistries))
    else:
        logger.info('>> Starting iann importing process...')

    iann_data = None
    if updateRegistries:
        iann_data = get_iann_data(registriesFromTime)

    user = None
    passw = None
    try:
        config = ConfigParser.RawConfigParser()
        config.read('ConfigFile.properties')
        usertemp = config.get('AuthenticationSection', 'database.user')
        passwtemp = config.get('AuthenticationSection', 'database.password')
        user = usertemp
        passw = passwtemp
    except Exception as e:
        logger.info("Not user info found, using anonymous user... ")
        logger.info(e)

    dbFactory = DBFactory()
    dbManager = dbFactory.get_default_db_manager_with_username(
        ds_name, user, passw)

    if (delete_all_old_data is not None and delete_all_old_data):
        iann_conditions = [['EQ', 'source', get_source_field()]]
        previous_count = dbManager.count_data_by_conditions(iann_conditions)
        dbManager.delete_data_by_conditions(iann_conditions)
        new_count = dbManager.count_data_by_conditions(iann_conditions)
        if (previous_count is not None and new_count is not None):
            logger.info('Deleted ' + str((previous_count - new_count)) +
                        ' registries')

    if iann_data is not None:
        numSuccess = 0
        for result in iann_data:
            if (result is not None):
                exists = util.existURL(get_link(record))
                # logger.info ('Exists? '+get_link(record)+' :'+str(exists))
                if (exists):
                    success = dbManager.insert_data({
                        "title":
                        get_title(result),
                        "start":
                        get_start(result),
                        "end":
                        get_end(result),
                        "city":
                        get_city(result),
                        "country":
                        get_country(result),
                        "field":
                        get_field(result),
                        "provider":
                        get_provider(result),
                        "link":
                        get_link(result),
                        "source":
                        get_source_field(),
                        "resource_type":
                        get_resource_type_field(),
                        "insertion_date":
                        get_insertion_date_field(),
                        "created":
                        get_creation_date_field(result)
                    })
                    if success:
                        numSuccess = numSuccess + 1

        logger.info('Inserted ' + str(numSuccess) + ' new registries')

    logger.info('<< Finished iann importing process.')