Exemple #1
0
def main():
    curr_full_path = FileUtils.getCurrentDirFullPath()
    config_fn = 'portal_activity_job_config.yaml'
    cI = ConfigUtils(curr_full_path + "/configs/", config_fn)
    configItems = cI.getConfigs()
    configItems[
        'config_dir'] = curr_full_path + "/" + configItems['config_dir']
    configItems['curr_full_path'] = curr_full_path
    db_ini = configItems['config_dir'] + configItems['database_config']
    conn_alq, meta_alq = PostgresStuff.connect_alq(db_ini)
    conn = PostgresStuff.connect(db_ini)
    db_tbl = configItems['activity_table']
    first_run = getFirstRun(conn)
    if first_run == 0:
        print("****First RUN! No new created datasets in the past " +
              configItems['activity']['create']['time_interval'] + "*****")
        exit(0)
    insert_created = updateCreatedDatasets(
        conn, configItems['activity']['create']['time_interval'])
    #print insert_created
    created_datasets = MonitorPortal.generateActivityReport(
        conn_alq, configItems, 'create')
    if (not (created_datasets)):
        print("**** No new created datasets in the past " +
              configItems['activity']['create']['time_interval'] + "*****")
        exit(0)
    datasetid_notified = MonitorPortal.generateEmail(conn_alq, configItems,
                                                     'create',
                                                     created_datasets)
    updted_notified_cnt = MonitorPortal.updateNotifiedDatasetIds(
        conn, configItems, 'create', datasetid_notified)
    print("******Notfied that " + str(updted_notified_cnt) +
          " datasets were created****")
    print("******Updated " + str(updted_notified_cnt) +
          " rows in the created_dataset table****")
Exemple #2
0
def main():
    fieldConfigFile, config_inputdir, jobType = parse_opts()
    cI = ConfigUtils(config_inputdir, fieldConfigFile)
    configItems = cI.getConfigs()
    configItems['dataset_name'] = jobType
    lg = pyLogger(configItems)
    logger = lg.setConfig()
    logger.info("****************JOB START******************")
    sc = SocrataClient(config_inputdir, configItems, logger)
    client = sc.connectToSocrata()
    clientItems = sc.connectToSocrataConfigItems()
    scrud = SocrataCRUD(client, clientItems, configItems, logger)
    sQobj = SocrataQueries(clientItems, configItems, logger)

    mmdd_fbf = configItems['dd']['master_dd']['fbf']
    field_profiles_fbf = configItems['dd']['field_profiles']['fbf']
    base_url = configItems['baseUrl']

    field_type_fbf = configItems['dd']['field_type']['fbf']

    load_mm_dd = ProfileFields.getBaseDatasetJson(sQobj, configItems, mmdd_fbf)
    #print load_mm_dd
    #load_mm_dd  = True
    current_field_profiles = ProfileFields.getCurrentFieldProfiles(
        sQobj, base_url, field_profiles_fbf)

    if load_mm_dd:
        master_dfList = ProfileFields.get_dataset_as_dfList(
            configItems['pickle_data_dir'], configItems['mm_dd_json_fn'],
            base_url)
        ProfileFields.removeDeletedFields(scrud, field_profiles_fbf,
                                          master_dfList,
                                          current_field_profiles)
        current_field_profiles = ProfileFields.getCurrentFieldProfiles(
            sQobj, base_url, field_profiles_fbf)

        dataset_info = ProfileFields.buildInsertFieldProfiles(
            sQobj, scrud, configItems, master_dfList, current_field_profiles)
        print dataset_info
        dsse = JobStatusEmailerComposer(configItems, logger, jobType)
        if dataset_info['DatasetRecordsCnt'] > 1:
            dsse.sendJobStatusEmail([dataset_info])
        else:
            dataset_info = {
                'Socrata Dataset Name': configItems['dataset_name'],
                'SrcRecordsCnt': 0,
                'DatasetRecordsCnt': 0,
                'fourXFour': "Nothing to Insert"
            }
            dataset_info['isLoaded'] = 'success'
            dsse.sendJobStatusEmail([dataset_info])
  def runWebTask(configItems):
    cItemsWebtask =  ConfigUtils(configItems['inputConfigDir'], configItems['webtask_config_fn'] )
    configItemsWebTasks = cItemsWebtask.getConfigs()
    url = configItemsWebTasks['webtask_url']
    attempts = 0
    while attempts < 3:
      try:
        response = urllib2.urlopen(url, timeout = 15)
        content = response.read()
        if(content):
          f = open( configItems['log_dir'] + "algolia-sync-results.json", 'w' )
          f.write( content )
          f.close()
          return True
          break

      except urllib2.URLError as e:
        attempts += 1
        print type(e)
        return False
    return False
Exemple #4
0
def main():
    curr_full_path = FileUtils.getCurrentDirFullPath()
    config_fn = 'portal_activity_job_config.yaml'
    cI = ConfigUtils(curr_full_path + "/configs/", config_fn)
    configItems = cI.getConfigs()
    configItems[
        'config_dir'] = curr_full_path + "/" + configItems['config_dir']
    configItems['curr_full_path'] = curr_full_path
    db_ini = configItems['config_dir'] + configItems['database_config']
    conn_alq, meta_alq = PostgresStuff.connect_alq(db_ini)
    conn = PostgresStuff.connect(db_ini)
    db_tbl = configItems['activity_table']
    rotate_items = rotateActivityData(conn)
    activity = "rotate_portal_activity"
    subject_line = configItems['activity'][activity]['email_msg'][
        'subject_line']
    msg_body = configItems['email_msg_template']['header'] + configItems[
        'activity'][activity]['email_msg']['msg']
    msg_body = msg_body + configItems['email_msg_template']['footer']
    em = Emailer(configItems)
    em.sendEmails(subject_line, msg_body.encode('utf-8').strip())
def main():
    curr_full_path = FileUtils.getCurrentDirFullPath()
    config_fn = 'portal_activity_job_config.yaml'
    cI = ConfigUtils(curr_full_path + "/configs/", config_fn)
    configItems = cI.getConfigs()
    configItems[
        'config_dir'] = curr_full_path + "/" + configItems['config_dir']
    configItems['curr_full_path'] = curr_full_path
    db_ini = configItems['config_dir'] + configItems['database_config']
    conn_alq, meta_alq = PostgresStuff.connect_alq(db_ini)
    conn = PostgresStuff.connect(db_ini)
    db_tbl = configItems['activity_table']
    digest_items = digestStaleDelayedDatasets(conn_alq, configItems,
                                              'stale_delayed_digest')
    if (not (digest_items)):
        print("**** No digest items " +
              configItems['activity']['update']['time_interval'] + "*****")
        exit(0)
    datasetid_notified = MonitorPortal.generateEmail(conn_alq, configItems,
                                                     'stale_delayed_digest',
                                                     digest_items)
def main():
  curr_full_path = FileUtils.getCurrentDirFullPath()
  config_fn = 'portal_activity_job_config.yaml'
  cI =  ConfigUtils(curr_full_path+ "/configs/" , config_fn)
  configItems = cI.getConfigs()
  configItems['config_dir'] = curr_full_path+ "/" + configItems['config_dir']
  configItems['curr_full_path']  = curr_full_path
  db_ini = configItems['config_dir'] + configItems['database_config']
  conn_alq, meta_alq =PostgresStuff.connect_alq(db_ini)
  conn = PostgresStuff.connect(db_ini)
  db_tbl = configItems['activity_table']
  insert_late_updated = updateStaleDelayedDatasets(conn, configItems['activity']['update']['time_interval'])
  print (insert_late_updated)
  stale_late_datasets  = MonitorPortal.generateActivityReport(conn_alq, configItems, 'update')
  print ( stale_late_datasets)
  if (not (stale_late_datasets)):
    print ("**** No changes for stale or deleyed datasets  " + configItems['activity']['update']['time_interval'] + "*****")
    exit (0)
  datasetid_notified = MonitorPortal.generateEmail(conn_alq, configItems, 'update', stale_late_datasets)
  updted_notified_cnt = MonitorPortal.updateNotifiedDatasetIds(conn, configItems, 'update', datasetid_notified)
  print ("******Notfied that " +str(updted_notified_cnt) + " datasets are late or stale****" )
  print ("******Updated" + str(updted_notified_cnt) + " rows in the late_updated_dataset table****" )
Exemple #7
0
def main():
    fieldConfigFile, config_inputdir, jobType, hourly = parse_opts()
    cI = ConfigUtils(config_inputdir, fieldConfigFile)
    configItems = cI.getConfigs()
    configItems['dataset_name'] = jobType
    lg = pyLogger(configItems)
    logger = lg.setConfig()
    logger.info("****************JOB START******************")
    sc = SocrataClient(config_inputdir, configItems, logger)
    client = sc.connectToSocrata()
    clientItems = sc.connectToSocrataConfigItems()
    scrud = SocrataCRUD(client, clientItems, configItems, logger)
    sQobj = SocrataQueries(clientItems, configItems, logger)

    mmdd_fbf = configItems['dd']['master_dd']['fbf']
    ds_profiles_fbf = configItems['dd']['ds_profiles']['fbf']
    base_url = configItems['baseUrl']
    field_type_fbf = configItems['dd']['field_type']['fbf']
    asset_inventory_fbf = configItems['dd']['asset_inventory']['fbf']

    ds_profiles = ProfileDatasets.getCurrentDatasetProfiles(
        sQobj, base_url, ds_profiles_fbf)
    update_counter = 0
    updated_datasets = []

    #if int(hourly) == 1:
    #  print "****hourly update****"
    for datasetid, last_updt in ds_profiles.iteritems():
        mm_profiles_to_updt = ProfileDatasets.getViewsLastUpdatedAt(
            datasetid, last_updt, clientItems)
        #print datasetid
        #print mm_profiles_to_updt
        if ('cols' in mm_profiles_to_updt.keys()):
            dataset_info_mm = {
                'Socrata Dataset Name': configItems['dataset_name'],
                'SrcRecordsCnt': 0,
                'DatasetRecordsCnt': 0,
                'fourXFour': mmdd_fbf,
                'row_id': 'columnid'
            }
            dataset_info_mm['DatasetRecordsCnt'] = 0
            dataset_info_mm['SrcRecordsCnt'] = len(mm_profiles_to_updt['cols'])
            #print mm_profiles_to_updt.keys()
            print "***************"
            print "Updating " + mm_profiles_to_updt['dataset_name']
            print "*************"
            print "**************"

            dataset_info_mm = scrud.postDataToSocrata(
                dataset_info_mm, mm_profiles_to_updt['cols'])
            update_counter = update_counter + 1
            updated_datasets.append(mm_profiles_to_updt['dataset_name'])

    datasets = ProfileDatasets.getBaseDatasets(sQobj, base_url, mmdd_fbf)
    asset_inventory_dict = ProfileDatasets.getAssetInventoryInfo(
        sQobj, base_url, asset_inventory_fbf)
    #delete datasets from the profile that no longer exist
    ProfileDatasets.removeDeletedDatasets(scrud, ds_profiles_fbf,
                                          asset_inventory_dict, ds_profiles,
                                          datasets)

    ds_profiles = ProfileDatasets.getCurrentDatasetProfiles(
        sQobj, base_url, ds_profiles_fbf)

    field_types = ProfileDatasets.getFieldTypes(sQobj, base_url,
                                                field_type_fbf)

    dataset_info = ProfileDatasets.buildInsertDatasetProfiles(
        sQobj, scrud, configItems, datasets, ds_profiles, field_types,
        asset_inventory_dict)

    ## triggers a webtask to update the agolia index
    updated_algolia = False
    if (WebTasks.runWebTask(configItems)):
        updated_algolia = True
    print "******Ran Updated *** agolia script****" + str(updated_algolia)
    print dataset_info
    dsse = JobStatusEmailerComposer(configItems, logger, jobType)
    if dataset_info['DatasetRecordsCnt'] > 1 and updated_algolia:
        print "update complete"
        dsse.sendJobStatusEmail([dataset_info])
    else:
        dataset_info = {
            'Socrata Dataset Name': configItems['dataset_name'],
            'SrcRecordsCnt': 0,
            'DatasetRecordsCnt': 0,
            'fourXFour': "Nothing to Insert"
        }
        dataset_info['isLoaded'] = 'success'
        dsse.sendJobStatusEmail([dataset_info])