Esempio n. 1
0
def po_from_csv(file, save_to_db=False):
    """
    Takes a CSV file as input and return its content as list. Saves them in db is save_to_db is true
    :param file: str po file
    :param save_to_db: boolean
    :return: list | po data
    """
    po_data = parse_file(file)

    if not save_to_db:
        return po_data

    query = "INSERT INTO po_scheduler.po_items (item_id, po_id, quantity) VALUES {values}"
    values = []

    if len(list(
            set(['po_id', 'item_id', 'quantity'])
            & set(po_data[0].keys()))) != 3:
        return []

    for data in po_data:
        values.append("({}, {}, {})".format(data['item_id'], data['po_id'],
                                            data['quantity']))

    if not values:
        return []

    write_to_db(query.format(values=", ".join(values)))
    return po_data
Esempio n. 2
0
def run_sentiment_analysis(with_graphs=False):
    conn = db_conn(db_config())
    sql = 'SELECT * FROM reviews'
    df = db_table(conn, sql)
    df['lang'] = df.apply(lambda x: detect_language(x['comments']), axis=1)
    df_english = df.loc[df['lang'] == 'english']
    df_scores = get_sentiment_scores(df_english)

    if with_graphs:
        plot_score_histograms(df_scores['positive'],
                              score_type='Positive',
                              filename='pos_sentiment.png')
        plot_score_histograms(df_scores['neutral'],
                              score_type='Neutral',
                              filename='neu_sentiment.png')
        plot_score_histograms(df_scores['negative'],
                              score_type='Negative',
                              filename='neg_sentiment.png')
        plot_score_histograms(df_scores['compound'],
                              score_type='Compound',
                              filename='compound_sentiment.png')

    df_avg = sentiment_by_listing(df_scores)
    dtypes = {'listing_id': INTEGER,
              'compound': FLOAT,
              'positive': FLOAT,
              'neutral': FLOAT,
              'negative': FLOAT}

    write_to_db(conn, df_avg, name='listings_sentiment', dtypes=dtypes)
Esempio n. 3
0
def save_inbound_to_db(inbounds):
    """
    Saves our scheduled POs to DB
    :param inbounds: list of inbound schedules
    :return: None
    """
    query = """INSERT INTO po_scheduler.item_inbound (
    dock_id, 
    item_id, 
    po_id, 
    quantity, 
    slot_start_date, 
    slot_end_date) VALUES {values} ON DUPLICATE KEY UPDATE quantity=quantity;"""

    values = []

    for inbound in inbounds:
        values.append("({}, {}, {}, {}, '{}', '{}')".format(
            inbound['dock_id'], inbound['item_id'], inbound['po_id'],
            inbound['quantity'], inbound['slot_start_date'],
            inbound['slot_end_date']))

        if not values:
            return []

    write_to_db(query.format(values=", ".join(values)))
Esempio n. 4
0
def docks_from_csv_to_db(file):
    """
    Takes a CSV file path or FileStorage obj and returns list of all docks. Saves the docks in db.
    :param file: str or FileStorage
    :return: list of all docks
    """

    dock_data = parse_file(file)
    query = "INSERT INTO po_scheduler.dock_slots (dock_id, slot_start_date, slot_end_date, capacity) VALUES {values};"
    values = []

    if len(
            list(
                set(['dock_id', 'slot_start_dt', 'slot_end_dt', 'capacity'])
                & set(dock_data[0].keys()))) != 4:
        return []

    for data in dock_data:
        values.append("({}, '{}', '{}', {})".format(
            data['dock_id'], data['slot_start_dt'].replace("T", " "),
            data['slot_end_dt'].replace("T", " "), data['capacity']))

    if not values:
        return []

    write_to_db(query.format(values=", ".join(values)))
    return dock_data
Esempio n. 5
0
def delete_suite(suite_id: int) -> None:
    '''delete a suite from the database'''
    query = """delete from
        suites
    where
        suiteid = %s
    """
    write_to_db(query, (suite_id))
Esempio n. 6
0
def update_suite(suite_id: int, name: str, description: str) -> None:
    '''update a suite name and description'''
    query = """
    update
        suites
    set
        suitename = %s,
        description = %s
    where
        suiteid = %s"""
    write_to_db(query, (name, description, suite_id))
Esempio n. 7
0
def update_suite_settings(suite_id: int, browser: str, width: int,
                          height: int) -> None:
    '''update the configuration settings for a suite'''
    query = """
    update
        suite_config
    set
        browser = %s,
        width = %s,
        height = %s
    where
        suiteid = %s
    """
    write_to_db(query, (browser, width, height, suite_id))
Esempio n. 8
0
def write_scores(conn, entities, scores):
    # Get data types for output table
    dtypes = {
        'listing_id': INTEGER,
        'date': DATE,
        'available': VARCHAR(length=10),
        'actual_price': FLOAT,
        'predicted_price': FLOAT
    }

    # Concatenate entity and score columns
    entities['predicted_price'] = scores

    # Write data frame to DB
    name = 'calendar_predicted'
    write_to_db(conn, entities, name, dtypes, if_exists='append')
Esempio n. 9
0
def add_config(suite_id: int, browser: str, width: int, height: int) -> int:
    '''add configuration to a suite'''
    query = """insert into
        suite_config (suiteid, browser, width, height)
    values
        (%s, %s, %s, %s)"""
    return write_to_db(query, (suite_id, browser, width, height))
Esempio n. 10
0
def add_schedule(suite_id: int, active: bool, period: int) -> int:
    if not active:
        period = 0
    query = """insert into
        scheduledSuite (suiteid, active, period, nextrun)
    values
        (%s, %s, %s, now() + interval %s minute)"""
    return write_to_db(query, (suite_id, active, period, period))
Esempio n. 11
0
def update_name(testid: int, name: str) -> int:
    '''changes the name of a test in the database'''
    query = '''update
        tests
    set testname = %s
    where
        testid = %s
    '''
    return write_to_db(query, (name, testid))
Esempio n. 12
0
def delete_test(test_id: int) -> int:
    '''recursively delete an entire test'''
    query = '''
    delete from
        tests
    where
        testid = %s
    '''
    return write_to_db(query, (test_id))
Esempio n. 13
0
def add_test(suite_id: int, name: str, active: bool, period: int) -> int:
    '''adds a test to the database'''
    query = """insert into
        tests(testname, suiteid)
    values
        (%s, %s)"""
    test_id = write_to_db(query, (name, suite_id))
    add_schedule(suite_id, test_id, active, period)
    return test_id
Esempio n. 14
0
def update_schedule_config(suite_id: int, active: bool, period: int) -> tuple:
    '''gets the configurable fields from a scheduled suite'''
    query = '''
    update
        scheduledSuite
    set
        active=%s, period=%s
    where
        suiteid = %s
    '''
    return (True, write_to_db(query, (active, period, suite_id))) or (False, 0)
Esempio n. 15
0
def add_suite(name: str, description: str, browser: str, width: int,
              height: int, active: bool, period: int) -> int:
    '''add a suite to the database'''
    query = """insert into
        suites (suitename, description)
    values
        (%s, %s)"""
    suite_id = write_to_db(query, (name, description))
    add_config(suite_id, browser, width, height)
    add_schedule(suite_id, active, period)
    return suite_id
Esempio n. 16
0
def delete_steps_from_test(testid: int) -> int:
    '''
    deletes all steps from a test
    (used when editing steps, because it's easier than updating each step)
    '''
    query = '''delete from
        steps
    where
        testid = %s
    '''
    return write_to_db(query, (testid))
Esempio n. 17
0
def add_run(testid: int, start: str) -> int:
    '''
    adds a run to the database
    used when a test starts
    '''
    query = '''
    insert into
        runs(testid, start)
    values
        (%s, %s);
    '''
    return write_to_db(query, (testid, start))
Esempio n. 18
0
def update_run(runid: int, end: str, passed: bool,
               screenshot_passed: bool) -> int:
    '''
    updates a run in the database
    used after a test completes
    '''
    query = '''
    update runs
    set
        end=%s, passed=%s, screenshot_passed=%s
    where
        runid=%s
    '''
    return write_to_db(query, (end, passed, screenshot_passed, runid))
Esempio n. 19
0
 def _add_dl_item(req, title, thumbUrl, duration):
     '''
         add a DLItem in database and return its id (primary key)
     '''
     entry = DLItem(utc_time=datetime.utcnow(),
                 url=req.url,
                 title=title,
                 thumb_url=thumbUrl,
                 duration=duration,
                 status=Status.PENDING,
                 progress=0)
     saved = utils.write_to_db(req.app, req.db, [entry])
     if saved is None or len(saved) == 0:
         return
     return saved[0]
Esempio n. 20
0
def add_run_step(runid: int, action: str, args: str, passed: bool,
                 take_screenshot: bool, screenshot_percent: float,
                 screenshot_passed: bool, screenshot_name: str) -> int:
    '''
    adds a step to a run
    '''
    query = '''
    insert into
        run_step(runid, action, args, passed, take_screenshot,
                 screenshot_percentage, screenshot_passed, screenshot_name)
    values
        (%s, %s, %s, %s, %s, %s, %s, %s);
    '''
    return write_to_db(
        query, (runid, action, args, passed, take_screenshot,
                screenshot_percent, screenshot_passed, screenshot_name))
Esempio n. 21
0
def schedule_next_suite(schedule_id: int) -> int:
    '''
    schedules a test to run either at the last scheduled time + the period,
    or at now + the period
    '''
    query = '''
    update
        scheduledSuite
    set
        nextrun = if(
            nextrun + interval period minute > now(),
            nextrun + interval period minute,
            now() + interval period minute
        )
    where
        id = %s
    '''
    return write_to_db(query, (schedule_id))
Esempio n. 22
0
# merge all df's
spy_df = pd.concat(data, axis=0)
spy_df.set_index(spy_df.date, inplace=True)
spy_df.drop(columns=['date'], inplace=True)
spy_df.sort_index(inplace=True)
spy_df.index = spy_df.index.tz_localize('utc').tz_convert(
    'America/Chicago').strftime(
        "%Y-%m-%d %H:%M:%S")  #convert timezones to vix timzone
spy_df['date'] = pd.to_datetime(spy_df.index)
spy_df = spy_df[[
    'date', 'open', 'high', 'low', 'close', 'volume', 'average', 'barCount'
]]

# merge vix dbs
spy_new = spy_df['2018-12-31 19:59:00':]
spy_merged = pd.concat([spy, spy_new], axis=0)
spy_merged.head()
spy_merged.tail()

# remove duplicates
print(spy_merged.shape)
spy_merged.drop_duplicates(keep='first', inplace=True)
print(spy_merged.shape)

# add to database
write_to_db(spy_merged, "odvjet12_market_data_usa", 'SPY')

# disconnect interactive brokers
ib.disconnect()
Esempio n. 23
0
        df = util.df(bars)
        max_date = df['date'].min()
        df.to_csv(save_path_prefix + max_date.strftime('%Y-%m-%d') + '.csv',
                  index=False,
                  sep=';')
    except TypeError as te:
        print(te)
        break

# clean scraped tables
files = glob.glob(save_path_prefix + '*')
market_data = [pd.read_csv(f, sep=';', parse_dates=['date']) for f in files]
market_data = pd.concat(market_data, axis=0)
market_data = market_data.merge(pd.Series(con_list[i]).rename('ticker'),
                                how='left',
                                left_index=True,
                                right_index=True)
market_data.drop_duplicates(inplace=True)
market_data.sort_index(inplace=True)

# save final table to db and hdf5 file
write_to_db(market_data, "odvjet12_market_data_usa", con_list[i])
store_path = 'D:/market_data/usa/' + con_list[i] + '.h5'
market_data.to_hdf(store_path, key=con_list[i])

# delete csv files
shutil.rmtree(con_list[i])

# disconnect interactive brokers
ib.disconnect()
Esempio n. 24
0
def  cluster_GA(nPool,eleNames,eleNums,eleRadii,generations,calc,filename,log_file,CXPB = 0.5,singleTypeCluster = False):
    '''
    DEAP Implementation of the GIGA Geneting Algorithm for nanoclusters
    '''
        
    best_db = ase.db.connect("{}.db".format(filename))
	
    #Creating types
    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
    creator.create("Individual", list,fitness=creator.FitnessMax)

	#Registration of the evolutionary tools in the toolbox
    toolbox = base.Toolbox()
    toolbox.register("poolfill", fillPool,eleNames,eleNums,eleRadii,calc)
    toolbox.register("individual",tools.initRepeat,creator.Individual,toolbox.poolfill,1)
    toolbox.register("evaluate1", fitness_func1)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)

	#Registering mutations and crossover operators 
    toolbox.register("mate", mate)
    toolbox.register("mutate_homotop", homotop)
    toolbox.register("mutate_rattle", rattle_mut)
    toolbox.register("mutate_rotate", rotate_mut)
    toolbox.register("mutate_twist", twist)
    toolbox.register("mutate_tunnel", tunnel)
    toolbox.register("mutate_partialinv",partialInversion)
    toolbox.register("mutate_skin",skin)
    toolbox.register("mutate_changecore",changeCore)
        
    #Registering selection operator 
    toolbox.register("select", tools.selTournament)


    population = toolbox.population(n=nPool)

    #Creating a list of cluster atom objects from pouplation
    pop_list = []
    for individual in population:
	    pop_list.append(individual[0])


	#Dask Parallelization
    def calculate(atoms):
	    atoms_min = minimize(atoms,calc)
	    return atoms_min

	#distribute and run the calculations
    clus_bag = db.from_sequence(pop_list, partition_size = 1)
    clus_bag_computed = clus_bag.map(calculate)
    lst_clus_min = clus_bag_computed.compute()

	
    for i,p in enumerate(population):
	    p[0] = lst_clus_min[i]
	
    #Fitnesses (or Energy) values of the initial random population
    fitnesses = list(map(toolbox.evaluate1, population)) 
        
    with open(log_file, 'a+') as fh:
        fh.write('Energies (fitnesses) of the initial pool' '\n')
        for value in fitnesses:
            fh.write("{} \n".format(value[0]))
	
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit

    #Evolution of the Genetic Algorithm
    with open(log_file, 'a+') as fh:
        fh.write('\n')
        fh.write('Starting Evolution' '\n')
	
    g = 0
    init_pop_db = ase.db.connect("init_pop_{}.db".format(filename))
    for cl in population:
	    write_to_db(init_pop_db,cl[0])

    bi = []
    while g < generations:
        mutType = None
        muttype_list = []
        g = g + 1
        with open(log_file, 'a+') as fh:
            fh.write('{} {} \n'.format('Generation', g))

        cm_pop = []
        if random.random() < CXPB:  #Crossover Operation
            mutType = 'crossover'
            with open(log_file, 'a+') as fh:
                fh.write('{} {} \n'.format('mutType', mutType))
                                
            #Crossover operation step. 
            #The child clusters will be checked for bonding and similarity
            #between other child clusters. 
            loop_count = 0
            while  loop_count != 200:
                clusters = toolbox.select(population,2,1)
                muttype_list.append(mutType)
                parent1 = copy.deepcopy(clusters[0])
                parent2 = copy.deepcopy(clusters[1])
                fit1 = clusters[0].fitness.values
                f1, = fit1
                fit2 = clusters[1].fitness.values
                f2, = fit2
                toolbox.mate(parent1[0],parent2[0],f1,f2)

                diff_list = []
                if checkBonded(parent1[0]) == True:
                    if loop_count == 0:
                        cm_pop.append(parent1)
                    else:
                        for c,cluster in enumerate(cm_pop):
                            diff = checkSimilar(cluster[0],parent1[0])
                            diff_list.append(diff)

                        if all(diff_list) == True:
                            cm_pop.append(parent1)
                loop_count = loop_count+1
                if len(cm_pop) == nPool:
                    break

        else:   #Mutation Operation
            mutType = 'mutations'
            with open(log_file, 'a+') as fh:
                fh.write('{} {} \n'.format('mutType', mutType))
                                
                                #Mutation opeation step
                                #Each cluster in the population will undergo a randomly chosen mutation step
                                #Mutated new clusters will be checked for bonding and similarity with other new clusters
            for m,mut in enumerate(population):
                mutant = copy.deepcopy(mut)
                if singleTypeCluster:
                    mutType = random.choice(['rattle','rotate','twist','partialinv','tunnel','skin','changecore'])
                else:
                    mutType = random.choice(['rattle','rotate','homotop','twist','partialinv','tunnel','skin','changecore'])

                muttype_list.append(mutType)

                if mutType == 'homotop':
                    mutant[0] = toolbox.mutate_homotop(mutant[0])
                if mutType == 'rattle':
                    mutant[0] = toolbox.mutate_rattle(mutant[0])
                if mutType == 'rotate':
                    mutant[0] = toolbox.mutate_rotate(mutant[0])
                if mutType == 'twist':
                    mutant[0] = toolbox.mutate_twist(mutant[0])
                if mutType == 'tunnel':
                    mutant[0] = toolbox.mutate_tunnel(mutant[0])
                if mutType == 'partialinv':
                    mutant[0] = toolbox.mutate_partialinv(mutant[0])
                if mutType == 'skin':
                    mutant[0] = toolbox.mutate_skin(mutant[0])
                if mutType == 'changecore':
                    mutant[0] = toolbox.mutate_changecore(mutant[0])
					
                diff_list = []
                if checkBonded(mutant[0]) == True:
                    for c,cluster in enumerate(cm_pop):
                        diff = checkSimilar(cluster[0],mutant[0])
                        diff_list.append(diff)

                    if all(diff_list) == True:
                        cm_pop.append(mutant)
		                	
            with open(log_file, 'a+') as fh:
                fh.write('{} {} \n'.format('mutType_list', muttype_list))
				

        mut_new_lst = []
        for mut in cm_pop:
            mut_new_lst.append(mut[0])
                        
        #DASK Parallel relaxation of the crossover child/mutatted clusters
        mut_bag = db.from_sequence(mut_new_lst, partition_size = 1)
        mut_bag_computed = mut_bag.map(calculate)
        mut_new_lst_min = mut_bag_computed.compute()
			
        for o,mm in enumerate(cm_pop):
            mm[0] = mut_new_lst_min[o]

        fitnesses_mut = list(map(toolbox.evaluate1, cm_pop)) 

        for ind, fit in zip(cm_pop, fitnesses_mut):
            ind.fitness.values = fit

        new_population = copy.deepcopy(population)
                        
        #Relaxed clusters will be checked for bonded and similarity with the other
        #clusters in the population. If dissimilar, they will be added to the new population.
        for cm1,cmut1 in enumerate(cm_pop):
            new_diff_list = []
            if checkBonded(cmut1[0]) == True:
                for c2,cluster1 in enumerate(population):
                    diff = checkSimilar(cluster1[0],cmut1[0])
                    new_diff_list.append(diff)
                if all(new_diff_list) == True:
                    new_population.append(cmut1)
                else:
                    pass
			
        with open(log_file, 'a+') as fh:
            fh.write('{} {} \n'.format('Total number of clusters in the new population', len(new_population)))
        
        fitnesses_pool = list(map(toolbox.evaluate1, new_population)) 
        
        with open(log_file, 'a+') as fh:
            fh.write('Energies (fitnesses) of the present pool' '\n')
            for value in fitnesses_pool:
                fh.write("{} \n".format(value[0]))
        
        #Selecting the lowest energy npool clusters from the new_population
        best_n_clus = tools.selWorst(new_population,nPool)
        population = best_n_clus

        best_clus = tools.selWorst(population,1)[0]
        with open(log_file, 'a+') as fh:
            fh.write('{} {} \n'.format('Lowest energy cluster is', best_clus))
            fh.write('{} {} \n'.format('Lowest energy is',best_clus.fitness.values[0]))
            fh.write('\n')
        bi.append(best_clus[0])
        write_to_db(best_db,best_clus[0])

    final_pop_db = ase.db.connect("final_pop_{}.db".format(filename))
    for clus in population:
        write_to_db(final_pop_db,clus[0])

    return bi,best_clus[0]
Esempio n. 25
0
fin_statement_info_all = pd.concat(fin_statement_info, axis=0)
coamap_all = pd.concat(coamap, axis=0)
coamap_all = coamap_all.drop_duplicates(ignore_index=True)
coamap_all.set_axis(coamap_all.columns.str.replace(r'@|#', ''),
                    axis='columns',
                    inplace=True)
# annual financial statements
annual_fin_statements_all = pd.concat(annual_fin_statements, axis=0)
annual_fin_statements_all = janitor.clean_names(annual_fin_statements_all)
annual_fin_statements_all.set_axis(
    annual_fin_statements_all.columns.str.replace(r'@|#', ''),
    axis='columns',
    inplace=True)
# interim financial statements
interim_fin_statements_all = pd.concat(interim_fin_statements, axis=0)
interim_fin_statements_all = janitor.clean_names(interim_fin_statements_all)
interim_fin_statements_all.set_axis(
    interim_fin_statements_all.columns.str.replace(r'@|#', ''),
    axis='columns',
    inplace=True)

# add to database
write_to_db(fin_statement_info_all, "odvjet12_stocks", "fundaments_usa_info")
write_to_db(coamap_all, "odvjet12_stocks", "fundaments_usa_coamap")
write_to_db(annual_fin_statements_all, "odvjet12_stocks",
            "fundaments_usa_annual")
write_to_db(interim_fin_statements_all, "odvjet12_stocks",
            "fundaments_usa_interim")

ib.disconnect()
Esempio n. 26
0
        'nyse': [
            f'http://www.eoddata.com/stocklist/NYSE/{l}.htm'
            for l in string.ascii_uppercase
        ]
    }

    # loop to obtain data from every url
    tickers = [get_tickers(value, key) for key, value in urls.items()]
    tickers_df = pd.concat(tickers, axis=0)
    tickers_df = tickers_df[[
        'code', 'name', 'high', 'low', 'close', 'volume', 'exchange'
    ]]
    tickers_df['date'] = datetime.date.today()

    # add to database
    write_to_db(tickers_df, "odvjet12_stocks", "stocks_usa")

    # connection
    ib = IB()
    ib.connect('127.0.0.1', 7497, clientId=1)

    # get tickers THIS STEP CAN BE REMOVED
    tickers = query_to_db('SELECT DISTINCT code FROM stocks_usa',
                          'odvjet12_stocks')
    tickers = tickers.code.to_list()

    # get contract details
    contracts = [
        ib.reqContractDetails(Stock(tick, 'SMART', 'USD')) for tick in tickers
    ]
    contracts_short = [con for con in contracts if len(con) > 0]
Esempio n. 27
0
                                 attrs={'id': 'dnevna_trgovanja'},
                                 thousands=".",
                                 decimal=',')[0]
        if len(tblByDate.index) == 0:
            continue

        # clean table
        tblByDate = janitor.clean_names(tblByDate)
        tblByDate = tblByDate.rename(columns={
            'ticker': 'symbol',
            'change_%': 'change'
        })
        tblByDate['change'] = tblByDate['change'].str.extract('(.\d+,\d+)')
        tblByDate['change'] = tblByDate['change'].str.replace(',', '.')
        tblByDate.loc[:, 'close':'turnover'] = tblByDate.loc[:, 'close':'turnover']\
            .applymap(lambda x: pd.to_numeric(x, errors='coerce'))
        scrapDate = pd.DataFrame({'date': datei}, index=range(len(tblByDate)))
        tblByDate = pd.concat([
            scrapDate.reset_index(drop=True),
            tblByDate.reset_index(drop=True)
        ],
                              axis=1)
        tblByDate['exchange'] = 'zse'
        zseList.append(tblByDate)

    # rbind all tables
    zseTrade = pd.concat(zseList, axis=0, sort=False)

    # add to database
    write_to_db(zseTrade, "odvjet12_stocks", "trade_zse")
Esempio n. 28
0
        #         index=False, sep=';')
    except TypeError as te:
        print(te)
        break

# merge all df's
vix_df = pd.concat(data, axis=0)
vix_df.set_index(vix_df.date, inplace=True)
vix_df.drop(columns=['date'], inplace=True)
vix_df.sort_index(inplace=True)
vix_df.index = vix_df.index.tz_localize('utc').tz_convert('America/Chicago').strftime("%Y-%m-%d %H:%M:%S")  #convert timezones to vix timzone
vix_df['date'] = pd.to_datetime(vix_df.index)
vix_df = vix_df[['date', 'open', 'high', 'low', 'close', 'volume', 'average', 'barCount']]
vix_df.head()
vix_df.tail()

# merge vix dbs
vix_new = vix_df['2019-03-30 00:00:00':]
vix_merged = pd.concat([vix, vix_new], axis=0)
vix_merged.head()
vix_merged.tail()

# remove duplicates
vix.drop_duplicates(keep='first', inplace=True)

# add to database
write_to_db(vix_merged, "odvjet12_market_data_usa", 'VIX')

# disconnect interactive brokers
ib.disconnect()
def main(project_mapping):
	"""
	project_mapping is a two-level nested dict.
	The first level's keys are the iLab project IDs and each one maps to a dict
	Each 'second level' dict has a bucket and client_emails key, which give the bucket name gs://<bucket name>
	and a list of emails, respectively
	"""

	logging.info('In cloud tracking module')
	logging.info('Project mapping: %s' % project_mapping)

	# get some configuration parameters
	params = utils.parse_config_file('TRACKING')
	
	# need to cleanup some of the parameters:
	try:
		params['retention_period'] = int(params['retention_period'])
		logging.info('Params read from config: %s' % params)
		logging.info('Retention period set to %s days' % params['retention_period'])
	except:
		logging.error('Could not interpret one of the configuration parameters correctly.  Check that the intended data types match those in the config file')
		sys.exit(1)		

	# set the expiration date
	target_date = datetime.datetime.now() + datetime.timedelta(days=params['retention_period'])

	# read the database file
	this_dir = os.path.dirname(os.path.realpath(__file__))
	params['data_retention_db'] = os.path.join(this_dir, params['data_retention_db'])
	if os.path.isfile(params['data_retention_db']):
		logging.info('About to parse database file' )
		project_database = utils.load_database(params['data_retention_db'], params)
		logging.info('Parsed from the database: %s' % project_database)
	else:
		logging.error('Could not find a database file at %s' % params['data_retention_db'])
		raise MissingPrimaryDatabaseException('The primary database file is missing.  Fix that.')

	for project_id, info_dict in project_mapping.items():

		logging.info('Checking project with iLab ID: %s' % project_id)
		# perhaps we have an ongoing project- then a bucket for this iLab ID probably already exists
		if project_id in project_database:

			logging.info('project with ID %s was already in our database. Plan to update the deletion date' % project_id)
			# get the info we have about this in our database
			db_entry = project_database[project_id]
			
			# ensure the bucket names match.  If they do, simply update the retention target date and the email contacts
			if info_dict['bucket'] == db_entry['bucket']:
				logging.info('The delivery buckets matched, as expected')
				logging.info('Changing deletion date from %s to %s' % (db_entry['target_date'].strftime(params['date_format']), target_date.strftime(params['date_format'])))
				db_entry['target_date'] = target_date
				existing_emails = set(db_entry['client_emails'])
				new_emails = set(info_dict['client_emails'])
				total_emails = existing_emails.union(new_emails)
				logging.info('Original emails were: %s' % existing_emails)
				logging.info('New emails were: %s' % new_emails)
				logging.info('The union of those sets of emails is %s' % total_emails)
				db_entry['client_emails'] = list(total_emails)
			else:
				# somehow the same iLab project was placed into a different bucket.  Shouldn't happen, so raise an exception.  We 
				# retain 1-to-1 mapping beween ilab and buckets IDs.  Maybe later we change this behavior based on a particular use-case
				logging.error('The bucket name did not match that of a prior project with the same iLab ID.  This should not happen.')
				logging.error('The bucket found in the database was: %s' % db_entry['bucket'])
				logging.error('The bucket that was just uploaded the demux to was: %s' % info_dict['bucket'])
				raise MultipleBucketsForSameProjectException('The iLab IDs were the same, but the bucket was somehow different.  Someone needs to check this!')
				#TODO- send a message for someone to fix it.

		else:
			logging.info('A new project will be added to the database.')
			logging.info('update info dict.  Before %s, then add %s' % (info_dict, target_date))
			info_dict.update({'target_date': target_date})
			project_database[project_id] = info_dict

	logging.info('Project database: %s' % project_database)
				
	# now write to the database file:
	utils.write_to_db(project_database, params)
Esempio n. 30
0
    dateCols = ['ex_dividend_date', 'payment_date', 'record_date', 'nav_date']
    zseStocks[numericCols] = zseStocks[numericCols].apply(pd.to_numeric,
                                                          errors='coerce',
                                                          axis=1)
    zseStocks[dateCols] = zseStocks[dateCols].apply(pd.to_datetime,
                                                    errors='coerce',
                                                    axis=1)
    zseStocks = zseStocks.drop(columns=deleteCols)
    zseStocks[strToNumeric] = zseStocks[strToNumeric].applymap(str_to_float)
    zseStocks['discount'] = zseStocks['discount'].divide(100)
    zseStocks['exchange'] = "zse"
    zseStocks['currency'] = "HRK"
    zseStocks['market_capitalization'] = zseStocks['shares'].mul(
        zseStocks['last'])
    zseStocks = zseStocks.rename(
        columns={
            'ticker': 'symbol',
            'issuer': "company_name",
            'nacerev': 'sector_code',
            '52wk_high': 'price_week52_high',
            '52wk_low': 'price_week52_low',
            'security_type': 'type',
            'shares': "number_shares_outstanding",
            'cash_dividend': 'dividend',
            'payment_date': 'dividend_payment_date',
            'record_date': 'dividend_record_date'
        })

    # add to database
    write_to_db(zseStocks, "odvjet12_stocks", "zse_stocks")