def run(self): currval = self.initval amberoptcmd = [self.switch, str(currval)] MetaUtils.runtool(self.cmd + amberoptcmd) currfit = MetaUtils.fitness("%s_%s/log" % (self.logd, currval)) self.k_values.append((currval,currfit)) while True: sys.stderr.write("\n==> current value: %s, fitness: %s\n" \ % (str(currval),str(currfit))) neighval = self.neighbour(currval) sys.stderr.write("neighval: %s" % (str(neighval))) amberoptcmd = [self.switch, str(neighval)] MetaUtils.runtool(self.cmd + amberoptcmd) neighfit = MetaUtils.fitness("%s_%s/log" % (self.logd, neighval)) sys.stderr.write("new fitness: %s " % str(neighfit)) self.k_values.append((neighval,neighfit)) fitvals = [f for _,f in self.k_values] if MetaUtils.localmax(fitvals, 3): MetaUtils.report(self.k_values) sys.exit(0) currval = neighval currfit = neighfit
def neighbour(self, v): fits = [f for _,f in self.k_values] if self.switch == "-n": if MetaUtils.move_by_step1(fits, STDDEV_CNT, STDDEV): return int(math.ceil(v * EX_STEP1)) return int(math.ceil(v * EX_STEP2)) if MetaUtils.move_by_step1(fits, STDDEV_CNT, STDDEV): return v + LEN_STEP1 return v + LEN_STEP2
def __init__(self, cmd, logd, depth): self.cmd = cmd self.logd = logd self.k_values = [] currd = depth fitw, currf = self.sinbad_seq(currd, WGTS) self.k_values.append((currd, fitw, currf)) while True: msg = "\n==> current depth: %s, fitness: %s\n" % (currd, currf) sys.stderr.write(msg) neighd = self.neighbourd(self.k_values, currd) fitw, neighf = self.sinbad_seq(neighd, WGTS) self.k_values.append((neighd, fitw, neighf)) print "------" if MetaUtils.localmax([f for _,_,f in self.k_values]): fitd, maxf = self.best(self.k_values) print "Max ambiguities found: %s" % maxf print "Best depth, weights: " for d,w in fitdw: print "%s : %s " % (d,w) sys.exit(0) currd, currf = neighd, neighf
def get_booker_cc(self): """function to get the booker country for an account Returns: a panda dataframe with booker country """ # get the campaign id, campaign name correspondence campaign_query = """ SELECT id AS campaign_id, name AS campaign_name FROM mrktctl.account_1Bid_Campaign """ msql = mu.getConnSql() msql.execute("USE mrktctl") campaigns = pd.read_sql_query(campaign_query, con=msql) msql.close() # get the pos, campaign id correspondence because in account_1, campaign could be # one pos or multiple pos cid = spark.table("spmeta.account_1_cc_campaign").withColumnRenamed( "cc", "pos") pd_cid = cid.toPandas() pd_cid.loc[pd_cid.pos == "TR", "campaign_id"] = 105 booker_cc_df = campaigns.merge(pd_cid) return booker_cc_df
def get_id_pos(self): """get campaign id, campaign correspondence from mysql database and join with the filtered campaign lists returns: spark dataframe """ # get the campaign id and campaign name correspondence from mysql msql = mu.getConnSql() msql.execute('USE mrktctl') qs = """ SELECT id campaign_id, name pos FROM account_1Bid_Campaign""" account_1_campaign = pd.read_sql_query(qs, con=msql) msql.close() account_1_campaign = spark.createDataFrame(account_1_campaign) # filter for the campaign list if self.pos == ['All']: return account_1_campaign else: filtered_pos = spark.createDataFrame( pd.DataFrame(data=self.pos, columns=["pos"])) account_1_campaign = account_1_campaign.join(filtered_pos, on="pos", how="inner") return account_1_campaign
def sinbad(self, d, wgt): w = wgt d_cmd = ["-d", str(d)] w_cmd = ["-w", str(w)] MetaUtils.runtool(self.cmd + d_cmd + w_cmd) logp = "%s_-d_%s_-w_%s/log" % (self.logd, d, w) f = MetaUtils.fitness(logp) w_values = [(w,f)] while True: sys.stderr.write("\n(d, w, f) => (%s, %s, %s)\n" % \ (str(d), str(w), str(f))) neighw = self.neighbourw(w_values, w) w_cmd = ["-w", str(neighw)] MetaUtils.runtool(self.cmd + d_cmd + w_cmd) logp = "%s_-d_%s_-w_%s/log" % (self.logd, d, neighw) neighf = MetaUtils.fitness(logp) w_values.append((neighw,neighf)) fits = [f for _,f in w_values] if MetaUtils.localmax(fits): for k,f in w_values: print "(%s,%s)" % (k,f) fitw,maxf = self.best_w(w_values) msg = "\n%s ambiguities found" % maxf msg += "\nd=%s, w=%s\n" % (d, fitw) sys.stderr.write(msg) return fitw, maxf w, f = neighw, neighf
def main(): """ Main function for when this module is called as a program. """ ret_status = 0 clopts, inp_name, targ_prog = get_command_line_data() #if not targ_prog in namer_constants.PROCESSABLE_PROGRAMS: if not targ_prog in PROCESSABLE_PROGRAMS: err_msg = 'Error! The target program, "{0}", is not known.'.\ format(targ_prog) sys.exit(err_msg) if os.path.exists(inp_name): try: file_typer = get_obpg_file_type.ObpgFileTyper(inp_name) ftype, sensor = file_typer.get_file_type() if ftype == 'unknown': if MetaUtils.is_ascii_file(inp_name): # Try treating the input file as a file list file. data_files_info = get_data_files_info(inp_name) if len(data_files_info) > 0: next_level_name = get_multifile_next_level_name( data_files_info, targ_prog, clopts) else: err_msg = "Error! No OBPG files found in {0}".\ format(inp_name) sys.exit(err_msg) else: # The input file wasn't a file list file. err_msg = "File {0} is not an OBPG file.".format(inp_name) sys.exit(err_msg) else: # The file is an OBPG file stime, etime = file_typer.get_file_times() file_metadata = file_typer.attributes data_file = obpg_data_file.ObpgDataFile( inp_name, ftype, sensor, stime, etime, file_metadata) next_level_name = get_1_file_name(data_file, targ_prog, clopts) print('Output Name: ' + next_level_name) except SystemExit as sys_ex: # The intention here is to catch exit exceptions we throw in other # parts of the program and continue with the exit, outputting # whatever error message was created for the exit. sys.exit(sys_ex) except: handle_unexpected_exception(sys.exc_info()) else: err_msg = "Error! File {0} was not found.".format(inp_name) sys.exit(err_msg) return ret_status
def get_cancellations(self): """get cancellation data from mysql at desired aggregated dimension and filter for the selected point of sales. Returns: spark dataframe with cancelled commission, cancelled bookings and cancelled roomnights at desired aggregated dimension """ msql = mu.getConnSql() msql.execute('USE ppc_sp') cancellation_query = """ SELECT r.date_cancelled yyyy_mm_dd , a.distribution pos , CAST(coalesce(r.dest_id, r.hotel_id) AS INT) hotel_id , CAST(sum(1) AS INT) cancellations , sum(commission_amount_euro) cancelled_commission , CAST(sum(roomnights) AS INT) cancelled_roomnights FROM {reservation_table} r force index (cancel) JOIN {affiliate_table} a on (a.affiliate_id = r.affiliate_id) WHERE r.date_cancelled >= '{start_date}' AND r.date_cancelled < '{end_date}' AND r.status not in ('fraudulent','test','unknown') AND r.partner_id = {account_4_partner_id} GROUP BY yyyy_mm_dd, pos, coalesce(r.dest_id, r.hotel_id) """.format(reservation_table=self.reservation_table, affiliate_table=self.affiliate_table, start_date=self.start_date, end_date=self.end_date, account_4_partner_id=self.partner_id) cancellations = pd.read_sql_query(cancellation_query, con=msql) msql.close() cancellations = spark.createDataFrame(cancellations) cancellations_agg = cancellations.groupBy(*self.agg_on)\ .agg(f.sum("cancelled_commission").alias("cancelled_commission") ,f.sum("cancelled_roomnights").alias("cancelled_roomnights") ,f.sum("cancellations").alias("cancellations")) if self.pos == ['All']: return cancellations_agg else: filtered_pos = spark.createDataFrame( pd.DataFrame(data=self.pos, columns=["pos"])) cancellations_agg = cancellations_agg.join(filtered_pos, on="pos", how="inner") return cancellations_agg
def run(self): currlen = self.length ambidextoptcmd = ["-k",str(currlen)] MetaUtils.runtool(self.cmd + ambidextoptcmd) currfit = MetaUtils.fitness("%s_%s/log" % (self.logd, currlen)) self.k_values.append((currlen,currfit)) while True: sys.stderr.write("\n==> current length: %s, fitness: %s\n" \ % (str(currlen),str(currfit))) neighlen = self.neighbour(currlen) ambidextoptcmd = ["-k",str(neighlen)] MetaUtils.runtool(self.cmd + ambidextoptcmd) neighfit = MetaUtils.fitness("%s_%s/log" % (self.logd, neighlen)) sys.stderr.write("new fitness: %s " % str(neighfit)) self.k_values.append((neighlen,neighfit)) fitvals = [f for _,f in self.k_values] if MetaUtils.localmax(fitvals, NBEST): MetaUtils.report(self.k_values) sys.exit(0) currlen = neighlen currfit = neighfit
def __init__(self, cmd, logd, depth): self.cmd = cmd self.logd = logd self.k_values = [] currd = depth currf = self.sinbad(currd) self.k_values.append((currd, currf)) while True: msg = "\n==> current depth: %s, fitness: %s\n" % (currd, currf) sys.stderr.write(msg) neighd = self.neighbour(self.k_values, currd) neighf = self.sinbad(neighd) self.k_values.append((neighd, neighf)) print "------" if MetaUtils.localmax([f for _,f in self.k_values], NBEST): fitd, maxf = self.best(self.k_values) print "Max ambiguities found: %s" % maxf print "Best depths: " , fitd sys.exit(0) currd, currf = neighd, neighf
def neighbourw(self, key_fits, w): fits = [f for _,f in key_fits] if MetaUtils.move_by_step1(fits, NNEIGH, STDDEV): return (w * WGT_STEP1) return (w * WGT_STEP2)
def neighbourd(self, key_fits, d): fits = [f for _,_,f in key_fits] if MetaUtils.move_by_step1(fits, NNEIGH, STDDEV): return d + D_STEP1 return d + D_STEP2
def sinbad(self, d): MetaUtils.runtool(self.cmd + ["-d", str(d)]) f = MetaUtils.fitness("%s_-d_%s/log" % (self.logd, d)) return f
def neighbour(self, l): fits = [f for _,f in self.k_values] if MetaUtils.move_by_step1(fits, NNEIGH, STDDEV): return l + LEN_STEP1 return l + LEN_STEP2