def bankrisk(innercode, checkdate):
        innercodeCalc, result = ca.reshape_innercode(innercode=innercode, checkdate=checkdate, cache=ca.bankRiskCache)
        #innercodeCalc=innercode
        result = df(result,columns=('innercode', 'es'))

        if len(innercodeCalc) != 0:
            query="""SELECT
                        financedata.bank_fin_prd.INNER_CODE,
                        financeprd.bank_risk.VAR
                    FROM
                        financedata.bank_fin_prd,
                        financeprd.bank_risk
                    WHERE
                        financedata.bank_fin_prd.INNER_CODE IN ( %s )
                    AND
                        financeprd.bank_risk.BANK_ID = financedata.bank_fin_prd.BANK_ID """ % ','.join(innercodeCalc)
            result_new = GetDataFromDB(config, query)
            result_new.columns = ['innercode', 'es']
            innercode_empty=df(columns=['innercode', 'es'])
            innercode_empty['innercode']=list(set(innercodeCalc)-set(result_new['innercode'].astype(str)))
            innercode_empty['es']=''
            innercode_empty.columns=['innercode','es']
            result_new=result_new.append(innercode_empty,ignore_index=1)
            #save cache for new data
            ca.set_cache(result_new=result_new, checkdate=checkdate,cache=ca.bankRiskCache)

            result=result.append(result_new,ignore_index=1)
            result['checkdate'] = checkdate
        else:
            result['checkdate'] = checkdate
        return (result.to_json(orient='index'))
Example #2
0
    def __init__(self, niimg, xy=None, alpha=.5, ch_user=None, fit=True,
                 cmap='magma'):
        from nilearn import _utils
        if isinstance(niimg, str):
            niimg = _utils(niimg)
        # Store params
        self.fit = fit
        self.alpha = alpha
        self.xy = xy

        # Read MRI data
        data = niimg.get_data()
        affine = niimg.affine

        # Setup MRI viewer
        self.axes = list()
        # ---- Horizontally stack the three views
        widths = np.array(np.abs([affine[1, -1], affine[0, -1],
                                  affine[0, -1]]), int)
        cum_width = 0
        for ii, width in enumerate(widths):
            ax = plt.subplot2grid((1, sum(widths)), (0, cum_width),
                                  colspan=width)
            self.axes.append(ax)
            cum_width += width
        plt.subplots_adjust(hspace=0, wspace=0, left=0, right=1.0, top=1.,
                            bottom=0.1)
        self.viewer = OrthoSlicer3D(data, affine=affine, cmap=cmap,
                                    axes=self.axes)
        # ----  Each view has a particular x, y, depth order
        self.ax_xyd = ([1, 2, 0], [0, 2, 1], [0, 1, 2])

        # Interactive components
        ax.get_figure().canvas.mpl_connect('key_press_event', self._press)

        # channel locations
        columns = (['handle_%i' % ii for ii in range(3)] +
                   ['x', 'y', 'z', 'x_idx', 'y_idx', 'z_idx'])
        self.ch_user = df(columns=columns) if ch_user is None else ch_user
        self.ch_pred = df(columns=columns)

        # Scatter plot handles
        plt.gcf().canvas.mpl_connect('motion_notify_event', self._draw)
        self._last_refresh = time()  # XXX to refresh every 100 ms max

        # Grid GUI
        self.grid = GridGUI(ax=plt.axes([0.1, 0., 0.1, 0.1]), xy=xy)

        # Setup surface fitting
        self.model = ModelSurface(alpha=alpha, verbose=None)

        self._init_add()
        plt.show()
def process_lock(args,junction):
    global table
    name = args.replace("/","_")
    dir = "pickle/" + name +  "/"
    path = "C:/Users/IBM_ADMIN/Documents/Disseration/Figures/"
    data = df().from_csv(dir + "data.csv")
    
    result = db.peak_weekday.find({"_id":args}).sort("_id", -1) 
    hour = [0,0]
    
    global weatherstore
    loc = df()
    data["IDUBLINC2_dailyrainMM"] = weatherstore["IDUBLINC2"]["dailyrainMM"]
    data["IDUBLINC2_TemperatureC"] = weatherstore["IDUBLINC2"]["TemperatureC"]
    point = wd.stations_coordinates("IDUBLINC2")
    loc["IDUBLINC2_distance"] = jh.distance_between_junction(junction, point)
    data["ILEINSTE8_dailyrainMM"] = weatherstore["ILEINSTE8"]["dailyrainMM"]
    data["ILEINSTE8_TemperatureC"] = weatherstore["ILEINSTE8"]["TemperatureC"]
    point = wd.stations_coordinates("ILEINSTE8")
    loc["ILEINSTE8_distance"] = jh.distance_between_junction(junction, point)
    data["ICODUBLI2_dailyrainMM"] = weatherstore["ICODUBLI2"]["dailyrainMM"]
    data["ICODUBLI2_TemperatureC"] = weatherstore["ICODUBLI2"]["TemperatureC"]
    point = wd.stations_coordinates("ICODUBLI2")
    loc["ICODUBLI2_distance"] = jh.distance_between_junction(junction, point)
    
    
    data = data.dropna()
    for res in result:
        h = res["item"][0]["hour"]
        hour[0] = str(h) + ":00"
        hour[1] = str((h)) + ":59"
        
    try:
        data = data.between_time(hour[0],hour[1]).resample('B').dropna()
    except Exception: 
        data = data.resample('B').dropna()
    
    ts1 = pd.Series(data["STT"].values.squeeze())
    ts_rain = {}
    #ts_temp = {}
    ts_rain["IDUBLINC2_dailyrainMM"] = pd.Series(data["IDUBLINC2_dailyrainMM"].values.squeeze())
    ts_rain["IDUBLINC2_TemperatureC"] = pd.Series(data["IDUBLINC2_TemperatureC"].values.squeeze())
    ts_rain["ILEINSTE8_dailyrainMM"] = pd.Series(data["ILEINSTE8_dailyrainMM"].values.squeeze())
    ts_rain["ILEINSTE8_TemperatureC"] = pd.Series(data["ILEINSTE8_TemperatureC"].values.squeeze())
    ts_rain["ICODUBLI2_dailyrainMM"] = pd.Series(data["ICODUBLI2_dailyrainMM"].values.squeeze())
    ts_rain["ICODUBLI2_TemperatureC"] = pd.Series(data["ICODUBLI2_TemperatureC"].values.squeeze())
    global columns
    #result = autocorrelation_plot(ts1)
    row = [np.corrcoef(ts1.values,ts_rain[a].values)[0,1] 
                        for a in ts_rain.keys()]
    table.append(row);
    columns=ts_rain.keys()
    records.append(args.replace("/","_")) 
def extract_linear_regression_data(filepath):
    # read csv into pandas DF
    # insert a column of ones into x
    # take furthest right column asd y
    # initialize theta to 0's
    data = pd.read_csv(filepath, header=None)
    x= df(data[list(data.columns)[:-1]]).astype('float')
    x.insert(0,None,1)
    x.columns = range(len(x.columns)) 
    y= df(data[data.shape[1]-1]).astype('float')
    y.columns = [0]
    theta = df(0,index=range(len(x.columns)),columns=range(1)).astype('float')
    return x,y,theta
Example #5
0
def unstack_me(fid, cols, dcols, outfile):
    """Unstacks data and writes to file"""
    data = pd.read_excel(fid)
    unstacked = df()
    leaders = df(data, columns=cols)
    for dc in dcols:
        temp = df(leaders)
        temp['Analyte'] = dc
        temp['Result'] = data[dc]
        # temp['Result'] = temp.Result.apply(fixred)
        unstacked = unstacked.append(temp, ignore_index=True)

    excelwriter(outfile, unstacked)
Example #6
0
def unstack_frame(frame, cols, dcols):
    """Unstacks dataframe and returns unstacked df

       cols: columns that will be repeated with sample info
       dcols: data columns for unstacking that will be individually appended to cols
    """
    data = frame
    unstacked = df()
    leaders = df(data, columns=cols)
    for dc in dcols:
        temp = df(leaders)
        temp['Analyte'] = dc
        temp['Result'] = data[dc]
        # temp['Result'] = temp.Result.apply(fixred)
        unstacked = unstacked.append(temp, ignore_index=True)
    return unstacked
    def calc_probabilities(self, data):
        """
        The method returns the selection probability associated with the
        the different choices.

        Inputs:
        data - DataArray object
        """
        [shape_param] = [1, ] * genlogistic.numargs
        observed_utility = self.calc_observed_utilities(data)
        num_choices = self.specification.number_choices
        probabilities = df(columns=self.choices,  index=data.index)
        lower_bin = 0
        for i in range(num_choices - 1):
            value = self.thresholds[i] - observed_utility
            if self.distribution == 'logit':
                upper_bin = genlogistic.cdf(value, shape_param)
            else:
                upper_bin = norm.cdf(value)
            choice = self.choices[i]
            probabilities.loc[:, choice] = upper_bin - lower_bin
            lower_bin = upper_bin
        choice = self.choices[i+1] #Last ordered choice 
        probabilities.loc[:, choice] = 1 - upper_bin
        return probabilities
def run():
    cursor = db.junctions.find({"_id":"30/7/1"})
    json_str =json_util.dumps(cursor)
    junctions =json_util.loads(json_str)
    junctions = sorted(junctions, key=lambda k: k['route']) 
    d1 = None
    for junction in junctions:
        d1 = process(junction["_id"])
        
    cursor = db.junctions.find({"_id":"13/2/1"})
    json_str =json_util.dumps(cursor)
    junctions =json_util.loads(json_str)
    junctions = sorted(junctions, key=lambda k: k['route']) 
    d2 = None
    for junction in junctions:
        d2 = process(junction["_id"])
    
    cursor = db.junctions.find({"_id":"17/6/1"})
    json_str =json_util.dumps(cursor)
    junctions =json_util.loads(json_str)
    junctions = sorted(junctions, key=lambda k: k['route']) 
    d3 = None
    for junction in junctions:
        d3 = process(junction["_id"])
        #d3.plot()
    
    d = df({
            "Low - 30/7/1": [d1["STT"].quantile(i/100) for i in range(1,99,10)],
            "Medium - 13/2/1":[d2["STT"].quantile(i/100) for i in range(1,99,10)],
            "High - 17/6/1":[d3["STT"].quantile(i/100) for i in range(1,99,10)]
            })
    d.plot(ylim=[0,600])
    plt.show()
def main():
    #create the training & test sets, skipping the header row with [1:]
    dataset = genfromtxt('D:/train.csv', delimiter=',', dtype=np.str_)[1:]

    print ('loaded train')

    target = [x[1] for x in dataset]
    train = [x[2:5].tolist() + x[14:].tolist() for x in dataset]

    #Clear the variable to release the memory.
    dataset = None

    dataset = genfromtxt('D:/test.csv', delimiter=',', dtype=np.str_)[1:]
    test_id = [x[0] for x in dataset]
    test = [x[1:4].tolist() + x[13:].tolist() for x in dataset]

    print ('loaded test')

    #create and train the random forest
    rf = RandomForestClassifier(n_estimators=100, n_jobs=16)
    rf.fit(train, target)

    print ("done fitting")

    columns_obj = ["id", "click"]
    list_obj = list(zip(test_id,rf.predict(test).tolist()))
    df_obj = df(list_obj, columns=columns_obj)

    savetxt('D:/submission.csv', df_obj, delimiter=',', fmt='%s')
    print ("prediction complete")
def process_lock(args):
    cursor = db.junctions.find({"_id":args})
    
    rain = getseriesweather('dailyrainMM',"mean","IDUBLINC2")
    wind = getseriesweather('WindSpeedGustKMH',"mean","IDUBLINC2")
    temperature = getseriesweather('TemperatureC',"mean","IDUBLINC2")
    #json_str =json_util.dumps(cursor)
    #junctions =json_util.loads(json_str)
    #neighbours1 = list(db.junctions.find({"junction2.point":junctions[0]["junction1"]["point"]}))
    #neighbours2 = list(db.junctions.find({"junction1.point":junctions[0]["junction2"]["point"]}))
    series = []
    neighbours = []
    #neighbours.extend(neighbours1)
    #neighbours.extend(neighbours2)
    #print("1",neighbours1)
    #print("2",neighbours2)    
    

    arg = args.split("/")
    series1 = {"route":arg[0],"link":arg[1],"direction":arg[2]}
    selected_series = getseries(series1)
    #for n in neighbours:
        #if not n["direction"] == series1["direction"]:
            #if not (n["route"] + "/" + n["link"]) == (series1["route"] + "/" + series1["link"]):
                #series.append(getseries({"route": n["route"],"link": n["link"],"direction": n["direction"]}))

    
    shift = 10*6*24

    ds = {"STT":selected_series}
    
    dframe = df(ds)
def getseriesweather(location):
    global data
    print(data)
    if data is None:
        data = db.weather.find({"location":location}).sort("_id", -1)
        #json_str =json_util.dumps(data)
        #data =json_util.loads(json_str)
        
    dates = []
    dailyrainMM = []
    windSpeedGustKMH = []
    Humidity = []
    HourlyPrecipMM = []
    TemperatureC = []
    
    for res in (data):
        for res2 in (res['item']):
            dailyrainMM.append(float(res2["dailyrainMM"]))
            windSpeedGustKMH.append(float(res2["WindSpeedGustKMH"]))
            Humidity.append(float(res2["Humidity"]))
            HourlyPrecipMM.append(float(res2["HourlyPrecipMM"]))
            TemperatureC.append(float(res2["TemperatureC"]))
            dates.append(datetime.strptime(res2['Time'],'%Y-%m-%d %H:%M:%S'))
    
    dframe = df({"dailyrainMM":TimeSeries(dailyrainMM,dates), 
                "WindSpeedGustKMH":TimeSeries(windSpeedGustKMH,dates), 
                "Humidity":TimeSeries(Humidity,dates), 
                "HourlyPrecipMM":TimeSeries(HourlyPrecipMM,dates),
                "TemperatureC":TimeSeries(TemperatureC,dates)})
    dframe.to_csv("c:/result.csv")
    
    return dframe
def create_submission(events,yw,yb):
    print "Preparing submission file...."
    submission = df()
    submission["Event"] = events
    submission["WhiteElo"] = yw
    submission["BlackElo"] = yb
    submission.to_csv("submission.csv",index=False)
Example #13
0
 def get_grid(self, astype='table'):
     from pandas import DataFrame as df
     geoms = self.geometries().keys()
     phases = [p.name for p in self.phases().values() if not hasattr(p, 'mixture')]
     grid = df(index=geoms, columns=phases)
     for r in grid.index:
         for c in grid.columns:
             phys = self.find_physics(phase=self[c], geometry=self[r])
             if phys is not None:
                 grid.loc[r][c] = phys.name
             else:
                 grid.loc[r][c] = '---'
     if astype == 'pandas':
         pass
     elif astype == 'dict':
         grid = grid.to_dict()
     elif astype == 'table':
         from terminaltables import SingleTable
         headings = [self.network.name] + list(grid.keys())
         g = [headings]
         for row in list(grid.index):
             g.append([row] + list(grid.loc[row]))
         grid = SingleTable(g)
         grid.title = 'Project: ' + self.name
         grid.padding_left = 3
         grid.padding_right = 3
         grid.justify_columns = {col: 'center' for col in range(len(headings))}
     elif astype == 'grid':
         grid = ProjectGrid()
     return grid
def recursive_add_consumers(consumer_id, seen = set([])):
        if consumer_id is None:
            return

        seen.add(consumer_id)
        consumer_key = sample[sample.Consumer == consumer_id]
        IP = df.drop_duplicates(df(consumer_key.IP))

        n = np.array(np.arange(len(IP)))

        IP_Map = set([])
        for i in n:
          value = sample[sample.IP.isin([IP.iloc[i,0]])]
          IP_Map.add(value)

        #print IP_Map

        print consumer_id
        print seen
        consumer_list = []

        #list of unique consumers that are linked to this one
        [consumer_list.extend(y.Consumer.iloc[l].tolist()) for l in [range(len(y.Consumer)) for y in IP_Map]]

        #print consumer_list
        #print [x for x in set(consumer_list).difference([consumer_id])]
        #unique_consumer_list = []
        #print [ x for x in set([y.Consumer.iloc[0] for y in IP_Map])]


        #tuples of ips and unique consumers attached to them
        print [(y.IP.iloc[0],set(y.Consumer.iloc[l].tolist())) for l in [range(len(y.Consumer)) for y in IP_Map]]
def data_result(args):
    name = args.replace("/","_")
    dir = "pickle/" + name +  "/"
    path = "C:/Users/IBM_ADMIN/Documents/Disseration/Figures/"
    data = df().from_csv(dir + "data.csv")
    data = data.resample('B').dropna()
    return data
Example #16
0
def to_dataframe(data_array):
	dataframe = df(data_array)
	dataframe.columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
	dataframe['datetime'] = dataframe.timestamp.apply(
		lambda x: pandas.to_datetime(datetime.fromtimestamp(x / 1000).strftime('%c')))
        dataframe.set_index('datetime', inplace=True, drop=True)
        dataframe.drop('timestamp', axis=1, inplace=True)
        return dataframe
def prepare_for_pawn(source_df, cols_to_include):
    to_handle = source_df.copy(deep=True)
    to_handle = to_handle[to_handle["ActionType_Pawn"].notnull()]
    cols_to_merge = ["Matter_Tablet", "Matter_Phone"]
    join_cols(to_handle, cols_to_merge)
    cols_to_merge = ["Matter_Other", "Matter_TV"]
    join_cols(to_handle, cols_to_merge)
    return df(to_handle, columns=cols_to_include)
def IP_Weight_Calc(consumer_id):

        if consumer_id == None:
            return
        consumer_key = sample[sample.Consumer == consumer_id]

        IP = df.drop_duplicates(df(consumer_key.IP))

        n = np.array(np.arange(len(IP)))
        IP_Weight_List = []

        for i in n:
            value = sample[sample.IP.isin([IP.iloc[i,0]])]
            value2 = len(df.drop_duplicates(df(value.Consumer)))
            value3 = (1/(value2**2))
            IP_Weight_List.append(value3)

        return sum(IP_Weight_List)
Example #19
0
 def get_beta(self):
     # doing johanson test and return 
     data = df()
     for i in range(len(self.__instrumentList)) :
         data[self.__instrumentList[i]] = np.asarray(self.__ds[self.__instrumentList[i]][-1*self.__windowSize:])
     jres = johansen.coint_johansen(data[self.__instrumentList], 0, 1)
     result = dict()
     for i in range(len(self.__instrumentList)) :
         result[self.__instrumentList[i]] = jres.evec[i,0]
     return result
def process_lock(args):
    cursor = db.junctions.find({"_id":args})
    
    rain = getseriesweather('dailyrainMM',"mean","IDUBLINC2")
    wind = getseriesweather('WindSpeedGustKMH',"mean","IDUBLINC2")
    temperature = getseriesweather('TemperatureC',"mean","IDUBLINC2")
    #json_str =json_util.dumps(cursor)
    #junctions =json_util.loads(json_str)
    #neighbours1 = list(db.junctions.find({"junction2.point":junctions[0]["junction1"]["point"]}))
    #neighbours2 = list(db.junctions.find({"junction1.point":junctions[0]["junction2"]["point"]}))
    series = []
    neighbours = []
    #neighbours.extend(neighbours1)
    #neighbours.extend(neighbours2)
    #print("1",neighbours1)
    #print("2",neighbours2)    
    

    arg = args.split("/")
    series1 = {"route":arg[0],"link":arg[1],"direction":arg[2]}
    selected_series = getseries(series1)
    #for n in neighbours:
        #if not n["direction"] == series1["direction"]:
            #if not (n["route"] + "/" + n["link"]) == (series1["route"] + "/" + series1["link"]):
                #series.append(getseries({"route": n["route"],"link": n["link"],"direction": n["direction"]}))

    
    shift = 10*6*24

    ds = {"STT":selected_series,
                 "Wind":wind,
                 "Rain":rain,
                 "Temperature":temperature,
                 "STT1":ewma(selected_series.shift(shift), span=1+shift),
                 "STT2":ewma(selected_series.shift(shift), span=2+shift),
                 "STT3":ewma(selected_series.shift(shift), span=3+shift),
                 "Wind1":ewma(wind.shift(shift), span=3+shift),
                 "Temperature1":ewma(temperature.shift(shift), span=3+shift),
                 "Rain1":ewma(rain.shift(shift), span=3+shift)
                 }
    
    dframe = df(ds)
    
    
        
    
    t_list = list(['STT1','STT2','STT3','Rain','Wind','Temperature','Rain1','Wind1','Temperature1'])
    for i,s in enumerate(dframe.columns.values):
        dframe[s].fillna(method="pad", inplace=True) 
    
    train_df = dframe["2013-01-01":"2014-04-17"].resample('H', how="max",convention='end',fill_method='pad').copy()
    test_df = train_df.copy()["2013-09-01":"2014-02-15"]
    train_df = train_df.copy()["2013-02-16":"2014-04-17"]
    train_df.to_csv("pickle/" + args.replace("/","_") +  "/" + "training_data.csv")
    test_df.to_csv("pickle/" + args.replace("/","_") +  "/" + "testing_data.csv")
Example #21
0
    def append_data(self, df_obj):
        """Appends dataframe obj to ScrapeData data

           Should be a setter, need to refactor"""
        try:
            df_obj = self.__setup_data(df_obj)
            cols = self.data.columns
            for_import = df(df_obj, columns=cols)
            return self.data.append(for_import, ignore_index = True)
        except KeyError:
            print 'Something is non-standard, prep data manually'
 def resolve_consistency(self, data, seed, numberProcesses):
     pschedulesGrouped = data.data.groupby(level=[0,1], sort=False)   
     
     verts = df(columns=self.colNames)
     verts[self.hidName] = pschedulesGrouped[self.hidName].min()
     verts[self.pidName] = pschedulesGrouped[self.pidName].min()
     verts[self.starttimeName] = pschedulesGrouped[self.starttimeName].min()
     verts[self.endtimeName] = pschedulesGrouped[self.endtimeName].max()  
     
     return DataArray(verts.values, self.colNames, 
                               indexCols=[self.hidName, self.pidName])
     """
def run():
    cursor = db.junctions.find()
    json_str =json_util.dumps(cursor)
    junctions =json_util.loads(json_str)
    junctions = sorted(junctions, key=lambda k: k['route']) 
    route = [int(j["route"]) for j in junctions]
    link = [int(j["link"]) for j in junctions]
    direction = [int(j["direction"]) for j in junctions]
    d = df({"route":route,
            "link":link,
            "direction":direction})
    print(d.describe())
def process(args):
    
    #['ICODUBLI2','ILEINSTE8','IDUBLINC2']
    
    dframe1 = getseriesweather("ICODUBLI2")["2014-01-24":"2014-01-29"].resample("H", how="mean",convention='end',fill_method="pad")
    dframe2 = getseriesweather("ILEINSTE8")["2014-01-24":"2014-01-29"].resample("H", how="mean",convention='end',fill_method="pad")
    dframe3 = getseriesweather("IDUBLINC2")["2014-01-24":"2014-01-29"].resample("H", how="mean",convention='end',fill_method="pad")
    dframe = df({"ICODUBLI2":dframe1,"ILEINSTE8":dframe2,"IDUBLINC2":dframe3})
    
    dframe.plot()
    print(dframe.corr())
    plt.show()
 def __init__(self, data=None, varnames=None, index=None, indexCols=None):
     # TODO:index
     if varnames is not None:
         for varname in varnames:
             self.check_varname(varname)
     else:
         varnames = []
     try:
         self.data = df(data, columns=varnames, index=index)
     except Exception, e:
         raise DataError, ("""Error creating the data frame object
                           with the dataset:%s""" % e)
Example #26
0
 def run(self, flatten=True):
     if not self.quiet:
         print("running with collection: %s, query: %s, project: %s" % (str(self.col), self.query, self.project))
     c = self.col.find(self.query, self.project)
     self.res = []
     for item in c:
         if flatten:
             self.res.append(flatten_dict(item))
         else:
             self.res.append(item)
     self.res = df(self.res)
     self.__cleanup()
Example #27
0
 def get_neighbour(self,id):
     if self.matrix is "":
         cursor = self.db.junctions.find()
         json_str =json_util.dumps(cursor)
         junctions =json_util.loads(json_str)
         junctions = sorted(junctions, key=lambda k: k['route']) 
         matrix = []
         headers = [y['_id'] for y in junctions]
         for x in junctions:
             matrix.append([int(jh.is_neighbour(x, y)) for y in junctions])
         matrix = df(matrix,columns=headers,index=headers) 
     return matrix[id]   
def run():
    cursor = db.junctions.find()
    json_str =json_util.dumps(cursor)
    junctions =json_util.loads(json_str)
    #os.remove(r'weather_correlation_lagged.csv')
    junctions = sorted(junctions, key=lambda k: k['route']) 
    for junction in junctions[:4]:
        process(junction["_id"],junction)
    data = df(table,columns=columns)
    data["_index"] = records
    
    data.dropna().to_csv("spatial_correlation_lagged.csv")   
def process_lock(args):
    print(args)
    dir = args.split("/")
    pickle_dir = "pickle/" + args.replace("/","_")
    if not os.path.exists(pickle_dir):
        os.makedirs(pickle_dir)
    series1 = {"route":dir[0],"link":dir[1],"direction":dir[2]}
    selected_series = getseries(series1)
    ds = {"STT":selected_series}
    dframe = df(ds)
    
    dframe.to_csv(pickle_dir +  "/" + "data.csv")
def prepare_df(X, Y, output_mode="binary", pred_hours=[8], min_ob_window = 4):

    X_out, y_out, hours_left = cut_data(X, output_mode = output_mode, pred_hours = pred_hours, min_ob_window= min_ob_window)

    print(len(X_out), len(hours_left), len(y_out))

    out_df = df({'X': X_out,
                 'hours_pass': [ h.shape[0] for h in X_out ],
                 # 'ylos': Y,
                 'hours_left': hours_left,
                 'y': y_out})  # .sort_values(by ='hours')

    return out_df
Example #31
0
                tail = True
                head = True
        # 가격 리스트 채우기
        for link in bsObject.find_all("span", {"class": "text-muted small"}):
            value = int(link.text.strip().replace('(',
                                                  "").replace(',', "").replace(
                                                      '원)', ""))
            # 총 가격 구하기
            if i == 1:
                value = value * 2
            elif i == 2:
                value = value * 3
                if value % 10 == 1:
                    value = value - 1
                elif value % 10 == 9:
                    value = value + 1
            elif i == 3:
                value = value * 4
            elif i == 4:
                value = value * 5
            price_list.append(value)

# 데이터프레임 생성
db = df(data={'Product': product_list, 'Sale': sale_list, 'Price': price_list})

# csv 변환
db.to_csv(csv_name + ".csv",
          mode="w",
          header=False,
          index=False,
          encoding='utf-7')
Example #32
0
'''
data = {
    'id': ['a1', 'a2', 'a3', 'a4', 'a5'],
    'x1': [1, 2, 3, 4, 5],
    'x2': [3.0, 4.5, 3.2, 4.0, 3.5]
}
#df=DataFrame(data)
#print(df)

#df=DataFrame(data)
#df.index=df['id']
#df.pop('id')
#print(df)

df_1 = df(data=np.arange(12).reshape(3, 4),
          index=['r0', 'r1', 'r2'],
          dtype='int',
          columns=['c0', 'c1', 'c2', 'c3'])
df_2 = df(
    {
        'class_1': ['a', 'a', 'b', 'b', 'c'],
        'var_1': np.arange(5),
        'var_2': np.random.randn(5)
    },
    index=['r0', 'r1', 'r2', 'r3', 'r4'])
#print(df_2)

#print(df_2.columns)
#print(df_2[['class_1', 'var_2']])

idx = ['r0', 'r1', 'r2', 'r3', 'r4']
df_1 = df({
Example #33
0
# Format = [
# ['H','F','F'etc.], (listes_genre)
# [0, 1, 2, etc.] (femmes_counter_array)
# [0.0, 0.5, 0.67, etc.] (proportion_counter)
# ]
for parti_liste in listes_genre:
    femmes_counter_array = []
    proportion_counter = []
    femmes_counter = 0
    for i in range(len(parti_liste)):
        if parti_liste[i] == 'F':
            femmes_counter += 1
        femmes_counter_array.append(femmes_counter)
        proportion_counter.append(femmes_counter / (i+1))
    listes_genre_et_counter.append((parti_liste, femmes_counter_array,proportion_counter))

partis_dfs = []
for i in range(len(listes_genre_et_counter)):
    parti_tableau = {}
    parti_grosses_listes = listes_genre_et_counter[i]
    for j in range(len(parti_grosses_listes)):
        categorie_liste = parti_grosses_listes[j]
        nom_colonne = (' '.join([categories[j],headers[i]]))
        parti_tableau[nom_colonne] = categorie_liste
    partis_dfs.append(df(parti_tableau))

import pandas as pd
t = pd.concat(partis_dfs, axis=1)
t['Classement des circonscriptions'] = range(1, len(t) + 1)
t.set_index('Classement des circonscriptions').to_csv('../gt.csv')
Example #34
0
 def make_beam(self,
               xi_distr,
               r_distr,
               pz_distr,
               ang_distr,
               Ipeak_kA,
               q_m=1.0,
               partic_in_layer=200,
               saveto='./',
               name='beamfile.bin'):
     """make_beam(xi_shape, r_shape, pz_shape, ang_shape, Ipeak_kA, N_partic=10000, q_m=1.0, partic_in_layer = 200, saveto='./')"""
     if q_m == 1 and Ipeak_kA > 0:
         print('Electrons must have negative current.')
         return
     if xi_distr.med > 0:
         print('Beam center is in xi>0.')
     try:
         partic_in_layer = self.beam_partic_in_layer
     except:
         print('Variable partic_in_layer is not found. Default value: 200.')
     try:
         xi_step = self.xi_step
         r_size = self.r_size
     except:
         xi_step = 0.01
         r_size = 10
         print(
             'Variable xi_step or r_size is not found. Default values: xi_step = %.3f, r_size = %3f'
             % (xi_step, r_size))
     if saveto and 'beamfile.bin' in os.listdir(saveto):
         print(
             'Another beamfile.bin is found. You may delete it using the following command: "!rm %s".'
             % os.path.join(saveto, name))
         return
     I0 = 17  # kA
     q = 2. * Ipeak_kA / I0 / partic_in_layer
     stub_particle = np.array([[-100000., 0., 0., 0., 0., 1.0, 0., 0.]])
     gamma = pz_distr.med
     N = 10000
     while True:
         xi = xi_distr(N)
         print('Trying', N, 'particles')
         xi = xi[(-self.xi_size <= xi)]  # & (xi <= 0)]
         if np.sum((xi_distr.med - xi_step / 2 < xi)
                   & (xi < xi_distr.med + xi_step / 2)) < partic_in_layer:
             print(
                 N, 'is not enough:',
                 np.sum((xi_distr.med - xi_step < xi)
                        & (xi < xi_distr.med)))
             N *= 10
             continue
         until_middle_layer_filled = [
             np.cumsum((xi_distr.med - xi_step < xi) & (xi < xi_distr.med))
             <= partic_in_layer
         ]
         xi = xi[until_middle_layer_filled]
         K = xi.shape[0]
         print(K, 'is enough')
         xi = np.sort(xi)[::-1]
         r = np.abs(r_distr(K))
         pz = pz_distr(K)
         pr = gamma * ang_distr(K)
         M = gamma * ang_distr(K) * r
         particles = np.array([
             xi, r, pz, pr, M, q_m * np.ones(K), q * np.ones(K),
             np.arange(K)
         ])
         beam = np.vstack([particles.T, stub_particle])
         break
     beam = df(beam, columns=['xi', 'r', 'pz', 'pr', 'M', 'q_m', 'q', 'N'])
     head = beam[beam.eval('xi>0')]
     beam = beam[beam.eval('xi<=0')]
     #beam.sort_values('xi', inplace=True, ascending=False)
     if saveto:
         beam.values.tofile(os.path.join(saveto, name))
         head.values.tofile(os.path.join(saveto, 'head-' + name))
     return beam
    # The tfidfvector trained on the person's corpus contains terms by index
    p_vector = persons_vocab[person]['tfidfvec']

    # The sparse term matrix resulting from vector training contains term weights by index
    # Sparse term matrices are defined by an array where row and column coordinates of only those cells with values are known.
    # array[k] = [row[k], col[k]] = data[k]
    # In the case of a sparse matrix generated by a tfidfvector from a corpus:
    # Each row is a document
    # Each column is a term
    # Each cell is a term weight.
    p_term_matrix = persons_vocab[person]['term_matrix']

    # Now to just extract the terms and weights and add them to the persons_weighted_terms dictionary 
    for doc in persons_vocab['term_matrix']:
    
    # Finally, add to the list of weighted terms.


## vocabs_df = pd.DataFrame([{},])
## vocabs_df = vocabs_df.set_index('')

#cols = word_counts.keys()
vocabs_df = pd.df()


# Standardize the data    

# Perform dimensionality reduction by PCA

# Export the resulting dataframe
Example #36
0
 def create_df():
     return df(columns=('ID', 'element_type', 'time', 'x_f', 'y_f', 'z_f', 'x_s', 'y_s', 'z_s', 'distance',
                        'ceiling_lvl', 'profile', 'u_x', 'u_y', 'u_z', 'HRRPUA', 'alpha'))
Example #37
0
 } for i in table_columns_post],
 data=table_data,
 row_selectable='multi',
 fixed_rows={'headers': True},
 selected_rows=[0],
 sort_action='native',
 style_cell={
     'textAlign': 'left',
     'width': '40px'
 },
 style_table={
     'height': '300px',
     'overflowY': 'auto'
 },
 style_data_conditional=discrete_background_color_bins(
     df(data=table_data), columns=table_columns_quintiles) +
 discrete_background_color_bins(df(data=table_data),
                                columns=table_columns_places,
                                dark_color='Greens') +
 [
     {
         'if': {
             'filter_query':
             '{Index} < 0',  # matching rows of a hidden column with the id, `id`
             'column_id': 'Entry'
         },
         'backgroundColor': 'rgb(255,248,220)'
     },
     {
         'if': {
             'column_id': 'Entry'
Example #38
0
from bs4 import BeautifulSoup
from urllib.request import urlopen
from pandas import DataFrame as df

response = urlopen(
    'http://cu.bgfretail.com/product/productAjax.do?pageIndex=1&searchMainCategory=10&searchSubCategory=3&listType=0&searchCondition=setA&searchUseYn=N&gdIdx=0&codeParent=10&user_id=&search2=&searchKeyword='
)
soup = BeautifulSoup(response, 'html.parser')

a = []
b = []
c = []
for price in soup.select('p.prodPrice'):
    a.append(price.get_text())
    #print(price.get_text())
for name in soup.select('p.prodName'):
    b.append(name.get_text())
    #print(name.get_text())
for img_url in soup.select('img[src*=".jpg"]'):
    c.append(img_url.get('src', '/'))
    #print(img_url.get('src', '/'))

df1 = df({"prodName": list(b), "prodPirce": list(a), "img_url": list(c)})

df1.to_csv('CU_ham.csv', index=False)
Example #39
0
                    listSuptNameVal.append(pcfSuptName)
                    listSuptUciVal.append(pcfSuptUci)
                    listSuptMtlVal.append(pcfSuptMtlList)
                    pcfSuptName = ""
                    pcfSuptUci = ""
                    pcfSuptMtlList = ""
                    iB = 0
                else:
                    iB = 0

            if iA == 0:
                print('레퍼런스 ID가 없는 {} 파일이 존재합니다.'.format(tagerFile))
                break

except Exception:
    print('{} 파일에서 에러가 났어요.'.format(tagerFile))

data = {
    'SUPPORT NAME': listSuptNameVal,  #input excel coulumn name
    'SUPPORT UCI': listSuptUciVal,  #input excel coulumn name
    'SUPPORT MATERIAL LIST': listSuptMtlVal,  #input excel coulumn name
    'WBS ISO': listRefVal,  #input excel coulumn name
    'ATTRIBUTE30': listAtt30Val,  #input excel coulumn name
    'ATTRIBUTE34': listAtt34Val
}  #input excel coulumn name
pcfDF = df(data)
writer = ExcelWriter('PCF_output.xlsx')
pcfDF.to_excel(writer, 'PCF', index=False)
writer.save()
print("완료")
Example #40
0
#
# # 根据训练样本中异常样本比例,得到阈值,用于绘图
# data_1 = pd.concat([data, scores_pred_df], axis=1, join_axes=[data.index])
# print(data_1.head(3))
# print('')
#
# data_2 = data_1[data_1['scores_pred'] > threshold]
# print("IsolationForest删除之后数据量: ", data_2.shape[0])
# print('')
# data_final = data_2.drop(["scores_pred"], axis=1)
# print(data_final.head(3))
# print('')
# data_final.to_csv('pag_with_dummy_if1.txt', index = False)
#
# print(data_final.head())
pag_with_dummy_if1 = df(pd.read_csv('pag_with_dummy_if1.txt'))
data_final = pag_with_dummy_if1.drop(["device_id"], axis=1)
#StandardScaler crude data
ss = StandardScaler()
data_final_=ss.fit_transform(data_final)
data_final_regular = pd.DataFrame(data_final_)

# find out k
SSE = []
for k in range(1, 10):
    estimator = KMeans(n_clusters=k)  # 构造聚类器
    estimator.fit(data_final_regular)
    SSE.append(estimator.inertia_) # estimator.inertia_获取聚类准则的总和
    print("k = ", k, " SSE = ",estimator.inertia_)
# print(SSE)
# print(X.shape())
Example #41
0
def prepareInputs(daydata, season, UsedInputs):
    nbrInputs = 0

    previousHours = UsedInputs[0]
    previousDay = UsedInputs[1]
    previousWeek = UsedInputs[2]
    temp = UsedInputs[3]
    tempMax = UsedInputs[4]
    tempMin = UsedInputs[5]
    dayIndicator = UsedInputs[6]

    if previousHours == True: nbrInputs = nbrInputs + 1
    if previousDay == True: nbrInputs = nbrInputs + 1
    if previousWeek == True: nbrInputs = nbrInputs + 1
    if temp == True: nbrInputs = nbrInputs + 1
    if tempMax == True: nbrInputs = nbrInputs + 1
    if tempMin == True: nbrInputs = nbrInputs + 1
    if dayIndicator == True: nbrInputs = nbrInputs + 7

    hourclusters = np.empty([(daydata.index.size * 24), 1])

    hourdataindex = pd.DataFrame(
        index=pd.date_range('2014-1-8 00:00:00', periods=(365) * 24, freq='H'))

    for x in range(0, daydata.index.size):
        for y in range(0, 24):
            hourclusters[(x * 24) + y, 0] = daydata.iloc[x, 24]
    hourclusters.size

    tempAlgiers = pd.read_csv('../data/tempAlgiers.csv')
    tempA = tempAlgiers.loc[:, 'Hour_1':'Hour_24']
    tempnp = np.array(tempA)
    tempnp = tempnp.reshape(-1, 1)
    tempdata = pd.DataFrame(tempnp)

    tempmax = tempAlgiers.loc[:, 'Tmax']
    tempmin = tempAlgiers.loc[:, 'Tmin']

    tempmx = np.random.random([tempmax.size * 24, 1])
    tempmn = np.random.random([tempmin.size * 24, 1])

    for x in range(0, tempmax.size):
        for y in range(0, 24):
            tempmx[(x * 24) + y, 0] = tempmax.iloc[x]

    for x in range(0, tempmin.size):
        for y in range(0, 24):
            tempmn[(x * 24) + y, 0] = tempmin.iloc[x]

    samples = daydata.index.size * 24
    daydata2 = daydata.copy()
    del (daydata2['cluster'])

    data = pd.DataFrame(np.array(daydata2).reshape(-1, 1))

    maxcons = data.values.max()
    mincons = data.values.min()

    maxtemp = np.max(tempdata.values)
    mintemp = tempdata.values.min()

    maxtempmax = np.max(tempmx)
    mintempmax = np.min(tempmx)

    maxtempmin = np.max(tempmn)
    mintempmin = np.min(tempmn)

    sigxx = np.empty((samples - 168, nbrInputs))
    sigyy = np.empty((samples - 168, 1))

    i = 0
    for x in list(range(168, samples)):
        i = 0
        if previousHours == True:
            sigxx[x - 168, i] = (data.iloc[x - 1, 0]) / (2 * maxcons)
            i = i + 1
        if previousDay == True:
            sigxx[x - 168, i] = (data.iloc[x - 24, 0]) / (2 * maxcons)
            i = i + 1
        if previousWeek == True:
            sigxx[x - 168, i] = (data.iloc[x - 168, 0]) / (2 * maxcons)
            i = i + 1
        if temp == True:
            sigxx[x - 168, i] = (tempdata.iloc[x]) / (2 * maxtemp)
            i = i + 1
        if tempMax == True:
            sigxx[x - 168, i] = (tempmx[x]) / (2 * maxtempmax)
            i = i + 1
        if tempMin == True:
            sigxx[x - 168, i] = (tempmn[x]) / (2 * maxtempmin)
            i = i + 1
        if dayIndicator == True:
            ind = 0
            for y in range(0, 7):
                sigxx[x - 168, i + ind] = 0
                ind = ind + 1
            sigxx[x - 168, i + pd.datetime.weekday(hourdataindex.index[x])] = 1

    for x in list(range(168, samples)):
        sigyy[x - 168, 0] = (data.iloc[x, 0]) / (2 * maxcons)

    sigmoidxx = df(sigxx.copy())
    sigmoidyy = df(sigyy.copy())

    sigmoidxx.index = pd.date_range('2014-1-8 00:00:00',
                                    periods=(365 - 7) * 24,
                                    freq='H')
    sigmoidyy.index = pd.date_range('2014-1-8 00:00:00',
                                    periods=(365 - 7) * 24,
                                    freq='H')

    sigmoidxx['cluster'] = hourclusters[168:]
    sigmoidyy['cluster'] = hourclusters[168:]
    dfhourclusters = df(hourclusters)

    temp1 = sigmoidyy[sigmoidyy.cluster == 0]
    temp2 = sigmoidyy[sigmoidyy.cluster == 1]
    temp3 = sigmoidyy[sigmoidyy.cluster == 2]

    if season == 'summer':
        if temp1.index[0] == pd.datetime(2014, 4, 9, 0, 0, 0):
            SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 0].copy()
        elif temp2.index[0] == pd.datetime(2014, 4, 9, 0, 0, 0):
            SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 1].copy()
        elif temp3.index[0] == pd.datetime(2014, 4, 9, 0, 0, 0):
            SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 2].copy()
    elif season == 'winter':
        if temp1.index[0] == pd.datetime(2014, 1, 8, 0, 0, 0):
            SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 0].copy()
        elif temp2.index[0] == pd.datetime(2014, 1, 8, 0, 0, 0):
            SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 1].copy()
        elif temp3.index[0] == pd.datetime(2014, 1, 8, 0, 0, 0):
            SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 2].copy()
    elif season == 'spring and autumn':
        if temp1.index[0] == pd.datetime(2014, 3, 18, 0, 0, 0):
            SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 0].copy()
        elif temp2.index[0] == pd.datetime(2014, 3, 18, 0, 0, 0):
            SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 1].copy()
        elif temp3.index[0] == pd.datetime(2014, 3, 18, 0, 0, 0):
            SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 2].copy()

    SigmoidOutputs = sigmoidyy[sigmoidyy.cluster == SigmoidInputs.loc[
        SigmoidInputs.index[0], 'cluster']]
    del (SigmoidInputs['cluster'], SigmoidOutputs['cluster'])

    learningoutputs = pd.DataFrame(
        SigmoidOutputs.iloc[:int(SigmoidOutputs.size - 168)].values.copy(),
        index=SigmoidOutputs.iloc[:int(SigmoidOutputs.size - 168)].index)
    testoutputs = pd.DataFrame(
        SigmoidOutputs.iloc[int(SigmoidOutputs.size - 168):].values.copy(),
        index=SigmoidOutputs.iloc[int(SigmoidOutputs.size - 168):].index)

    learninginputs = pd.DataFrame(
        SigmoidInputs.iloc[:int(SigmoidOutputs.size - 168)].values.copy(),
        index=SigmoidOutputs.iloc[:int(SigmoidOutputs.size - 168)].index)
    testinputs = pd.DataFrame(
        SigmoidInputs.iloc[int(SigmoidOutputs.size - 168):].values.copy(),
        index=SigmoidOutputs.iloc[int(SigmoidOutputs.size - 168):].index)

    print('-------Input preparation process complet-------')
    return learninginputs, learningoutputs, testinputs, testoutputs, nbrInputs
Example #42
0
from sklearn import tree
import graphviz

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

dataset = read_csv('realdata3.csv')
modifiedData = dataset.fillna(np.NaN)
print(modifiedData.head(5))
d = modifiedData



d1 = fancyimpute.MICE().complete(d)
newd=df(data = d1, index = d.index, columns= list(d.columns))
newd.to_csv('test2.csv')
values = newd.values
outcome_var = 'BAD'
model = tree.DecisionTreeClassifier(criterion = "entropy", max_depth = 7, min_samples_split=500, min_samples_leaf=500)
predictor_var = ['LOAN', 'MORTDUE','REASON' , 'VALUE','DELINQ', 'DEROG' ,'CLAGE','Other','DELINQ', 'Office' ,'Sales', 'ProfExe']
X = values[: , range(18)[1:]]
Y = values[:,0]
model = LogisticRegression()
rfe = RFE(model, 6)
fit = rfe.fit(X, Y)
print("Num Features: %d" % fit.n_features_)
print("Selected Features: %s" % fit.support_)
print("Feature Ranking: %s" % fit.ranking_)

Example #43
0
for tr in trList_1:
    if 'View=' + managed_entity_view_name in tr[1].text:
        exportID_1 = tr[2].text

url_me_1 = url_managed_entity_1 + exportID_1
r_me_1 = requests.get(url_me_1)
tree_me_1 = etree.fromstring(r_me_1.content, parser=parser)
trList_me_1 = tree_me_1.xpath('//tr')

me_app_mapping = []

for tr in trList_me_1:
    if len(tr.getchildren()) == 9:
        if len(tr.getchildren()[2].getchildren()) == 1:
            if tr.getchildren()[2].getchildren()[0].text != None:
                attr_str = tr.getchildren()[8].getchildren()[0].text
                if key_attribute + '=' in attr_str:
                    for attr in attr_str.split(','):
                        if key_attribute + '=' in attr:
                            app_name = attr.split('=')[1].strip()
                else:
                    app_name = None
                mapping = {
                    'managedEntity': tr.getchildren()[2].getchildren()[0].text,
                    'AppName': app_name
                }
                me_app_mapping.append(mapping)

dfMapping = df(me_app_mapping)
dfMapping.head()
Example #44
0
confirmed_names = []
deaths_names = []
def names_column(frame, lst): #Makes a new column called Name
    for i in range(len(frame)):
        if type(frame['Province/State'][i]) is str:
            lst.append(frame['Province/State'][i])
        else:
            lst.append(frame['Country/Region'][i])
    frame['Name'] = df(lst)

names_column(confirmed, confirmed_names)
names_column(deaths, deaths_names)

# confirmed['Name'] = df(confirmed_names)
# deaths['Name'] = df(deaths_names)
confirmed['confirmed_size'] = df(confirmed_size)
deaths['death_size'] = df(death_size)

map_confirmed = go.Scattermapbox(
        customdata = confirmed[yesterdays_date
    ],
        name='Confirmed Cases',
        lon=confirmed['Long'],
        lat=confirmed['Lat'],
        text=confirmed['Name'],
        hovertemplate=
            "<b>%{text}</b><br>" +
            "Confirmed Cases: %{customdata}<br>" +
            "<extra></extra>",
        mode='markers',
        showlegend=True,
Example #45
0
    
    # if last batch in laset subset and smaller than batch size, ignore
    if len(imlist) < config_inference.BATCH_SIZE:
        print("Batch is smaller than batch size (last batch?), won't do inference on it")
        continue

    # Detect objects in this batch of images
    r = model.detect(imlist, verbose=0)    
        
    # Init df to save coords for this batch
    Annots_DF = df(columns= [
        "unique_nucleus_id",
        "slide_name", 
        "nucleus_label", 
        "nucleus_label_confidence",
        "fov_offset_xmin_ymin", 
        "roi_offset_xmin_ymin", 
        "center_relative_to_slide_x_y",
        "bounding_box_relative_to_slide_xmin_ymin_xmax_ymax",
        "boundary_relative_to_slide_x_coords",
        "boundary_relative_to_slide_y_coords",
    ])
    
    # ==================================================================
    # Save coords for all instances in each image in batch

    for imidx in range(len(r)):

        # Extract image info
        iminfo = dataset.image_info[idx_start:idx_end][imidx]

        # convert to three channels
Example #46
0
    '과학': 70
}, {
    '국어': 63,
    '영어': 60,
    '수학': 31,
    '과학': None
}, {
    '국어': 23,
    '영어': 48,
    '수학': None,
    '과학': 69
}]

# 딕셔너리의 키값이 컬럼의 이름으로 지정됨
# 리스트를 원소로 갖는 딕셔너리를 사용하면, 인덱스만 따로 지정함
data = df(grade_dic, index=['철수', '영희', '민철', '수현', '호영'])
print(data)
print('-' * 40)

# 1. 단일 조건
# => 기본적인 비교식을 사용한다
# 국어점수 > 80 인 학생 조회
result = data.query('국어 > 80')
print(result)
print('-' * 40)

# 2. and 조건 사용
# 국어점수가 80점을 넘고, 수학점수도 80점 넘는 학생
result = data.query('국어 > 80 and 수학> 80')
print(result)
print('-' * 40)
Example #47
0
import MySQLdb as my
from sqlalchemy import create_engine as ce
from pandas import DataFrame as df

ef1 = pd.read_csv('/home/ai21/Desktop/common/Python_Exercises/emp.csv',
                  header=None,
                  names='name empno desig salary deptcode'.split())

ef1['deptcode'] = [121, 122, 123, 121, 121, 123, 122, 121, 121, 124]
print ef1

ce1 = ce("mysql://*****:*****@127.0.0.1/ai")
#ef1.to_sql("ai_21_emp", ce1)

dic1={ 'dept_no':[121,122,123,124],\
'dept_name':['CSE','ECE','MECH','IT'],\
'dept_location':['B2F0','B3F2','B1F1','B3F1'] }

df1 = df(dic1, columns=['dept_no', 'dept_name', 'dept_location'])
#df1.to_sql("ai_21_dep", ce1)

con = my.connect('127.0.0.1', 'ai', 'ai', 'ai')
ef2 = pd.read_sql("select * from ai_21_emp", con)
df2 = pd.read_sql("select * from ai_21_dep", con)

df3 = ef2.merge(df2, left_on='deptcode',
                right_on='dept_no')[['empno', 'name', 'dept_name',
                                     'salary']].sort_values(['salary'])
print df3
#df3.to_sql("ai_21_emp_dep", ce1)
def ret_all_courses(str_input):
    # initializes the file object with a text file that maps each career to a major
    f = open(os.path.join("", "app", "all_text_files", "list_careers.txt"), "r", encoding="utf-8")
    all_text = f.read()
    f.close()
    list_lines = all_text.split("\n")
    jobs = []
    majors = []
    # initializing a list of jobs and list of majors
    for each in list_lines:
        parts = each.split("*")
        if len(parts) == 2:
            job = parts[0].strip()
            jobs.append(job)
            major = parts[1].strip()
            majors.append(major)

    # checks if the string entered is present in job list
    if str_input in jobs:
        course = []
        desc = []
        list_desc = []
        fuzzy = []
        # finds the index for the given job
        index = jobs.index(str_input)
        # finds the corresponding major
        major = majors[index]
        # string manipulation for the file name
        if ":" in major:
            parts = major.split(":")
            major = parts[0] + parts[1]
        f = open(os.path.join("majors", major.lower() + ".html"), encoding="utf-8")
        h_text = f.read()
        f.close()
        # using a soup object to search for list of courses from the website of the major
        soup = BeautifulSoup(h_text, "html.parser")
        all_a = soup.find_all("a", {'class': "bubblelink code"})
        for each in all_a:
            course_text = clean_up_text(each.get_text()).replace("\u200b", "")
            desc_text = get_desc_text(course_text)
            if desc_text is None:
                print(course_text, "None!")
                continue
            if course_text[-3:].isnumeric():
                if course_text[:2] == "or":
                    course.append(course_text[2:-3] + " " + course_text[-3:])
                else:
                    course.append(course_text[:-3] + " " + course_text[-3:])
                desc.append(desc_text)
                # using fuzzy to assign the ratio of the match which is used to sort the amount overlap
                fuzzy.append(fuzz.token_sort_ratio(str_input, desc_text))
        # finding a more detailed description in all_data.csv
        for i in range(len(desc)):
            flag = 1
            for j in range(len(csv_data)):
                if cell(j, "Name").lower() in desc[i].lower():
                    flag = 0
                    list_desc.append(cell(j, "Description"))
                    break
            if flag == 1:
                list_desc.append("no desc found")
        # constructing the dataframe
        dict_vals = {"course": course, "name": desc, "description": list_desc, "fuzz": fuzzy}
        df_courses = df(dict_vals)
        # sorting by the amount of match
        df_courses = df_courses.sort_values(by=["fuzz"], ascending=[False])
        df_courses = df_courses.reset_index(drop=True)
        df_courses = df_courses.drop_duplicates(subset=["course"], keep="first")
        df_courses = df_courses.reset_index(drop=True)
        # returning those rows where the amount of match is greater than 30
        return df_courses[df_courses["fuzz"] > 30]
    else:
        print("job not in jobs list")
    # Count of patent: 927223

    dict_subcls_list = json.load(
        open(WORK_DIR + '/data_json/subclass_list_ini.json'))
    print(dict_subcls_list.keys())
    print_count_subcls_list(dict_subcls_list)

    tmp = data[data['subclass'].isin(
        dict_subcls_list['subclass_Svc.'])]['patent_no'].values
    pat_no_list = list(set(tmp))
    data_pat_Svc = data[data['patent_no'].isin(pat_no_list)]
    data_pat_Svc.head()

    tmp = data_pat_Svc.groupby('subclass').count()
    print("Count of subclass: {}".format(len(tmp)))
    df_gby_cls = df({'subclass': tmp.index.values})
    df_gby_cls['count_pat'] = tmp[tmp.columns[-1]].values
    df_gby_cls.head()

    df_gby_cls.describe()
    """           count_pat
        count     727.000000
        mean     1052.097662
        std     13667.482842
        min         1.000000
        25%         2.500000
        50%        18.000000
        75%       104.000000
        max    357936.000000    
    """
    df_gby_cls.plot.hist(bins=100, log=True)
Example #50
0
def generateline(stocknumber, Type, startdate, enddate, interval):
    startdata = startdate.encode("ascii").replace("/", "-").replace(
        "\n", "")  #convert to tushare readable date
    enddata = enddate.encode("ascii").replace("/", "-").replace("\n", "")
    array = df()
    #print startdata
    #print enddata
    current_time = time.strftime("%Y/%m/%d")
    if Type == "分笔".decode("utf-8"):
        if startdate != current_time:
            array = ts.get_tick_data(stocknumber, date=startdata)  #分笔
            if array is None:
                return
            array = array.sort_values("time")
            date = array["time"].tolist()
            amount = array["amount"].tolist()
            atype = array["type"].tolist()
            price = array["price"].tolist()
            flag = ["bar" for i in date]
            for idx, val in enumerate(atype):  #if卖盘,交易变成负数
                if val == "卖盘":
                    amount[idx] = -amount[idx]
                if val == "中性盘":  #if中性盘,则忽略. Might have a problem with this part??
                    amount[idx] = 0
            returnarray = zip(date, amount, flag, price)
            return returnarray
        else:
            array = ts.get_today_ticks(stocknumber)  #Tushare里今日分笔和历史分笔需要分别对待
            if array is None:
                return
            array = array.sort_values("time")
            date = array["time"].tolist()
            amount = array["amount"].tolist()
            atype = array["type"].tolist()
            flag = ["bar" for i in date]
            for idx, val in enumerate(atype):
                if val == "卖盘".decode("utf-8"):
                    amount[idx] = -amount[idx]
                if val == "中性盘".decode("utf-8"):
                    amount[idx] = 0
            returnarray = zip(date, amount, flag)
            return returnarray

    if interval != "qfq" and interval != "hfq":  #正常历史k线
        if Type != "Kline":
            array = ts.get_k_data(stocknumber,
                                  start=startdata,
                                  end=enddata,
                                  ktype=interval)
            if array is None:
                return
            Type1 = firstletter(Type).encode("ascii")
            target = array[Type1].tolist()
            date = array["date"].tolist()
            returnarray = zip(date, target)
            return returnarray
        else:
            array = ts.get_k_data(stocknumber,
                                  start=startdata,
                                  end=enddata,
                                  ktype=interval)
            if array is None:
                return
            Date = array["date"].tolist()
            Open = array["open"].tolist()
            Close = array["close"].tolist()
            High = array["high"].tolist()
            Low = array["low"].tolist()
            Candlestick = zip(*[Date, Open, Close, Low, High])
            return Candlestick
    else:
        if Type != "Kline":  # 复权
            array = ts.get_h_data(stocknumber,
                                  start=startdata,
                                  end=enddata,
                                  autype=interval)
            if array is None:
                return
            Type1 = firstletter(Type).encode("ascii")
            array = array.sort_index()
            target = array[Type1].tolist()
            date = array.index.format()
            returnarray = zip(date, target)
            return returnarray
        else:
            array = ts.get_h_data(stocknumber,
                                  start=startdata,
                                  end=enddata,
                                  autype=interval)
            if array is None:
                return
            array = array.sort_index()
            Date = array.index.format()
            Open = array["open"].tolist()
            Close = array["close"].tolist()
            High = array["high"].tolist()
            Low = array["low"].tolist()
            Candlestick = zip(*[Date, Open, Close, Low, High])
            return Candlestick
Example #51
0
def process_features(my_df, type, params):
    try:
        if not os.path.isdir(params.cache_dir):
            os.makedirs(params.cache_dir)
        if type == "train":
            dict = hkl.load(
                open(str(os.path.join(params.cache_dir, "train_features.hkl")),
                     "r"))
            return dict["X"], dict["y_white"], dict["y_black"]
        elif type == "test":
            dict = hkl.load(
                open(str(os.path.join(params.cache_dir, "test_features.hkl")),
                     "r"))
            return dict["X"]
    except:
        X = df()
        #Global
        X["MaxMoveScore"] = my_df["MoveScores"].apply(np.max)
        X["MinMoveScore"] = my_df["MoveScores"].apply(np.min)
        X["RangeMoveScore"] = X["MaxMoveScore"] - X["MinMoveScore"]
        X["IQRMoveScore"] = my_df["MoveScores"].apply(safe_iqr)
        X["MedianMoveScore"] = my_df["MoveScores"].apply(np.median)
        X["STDMoveScore"] = my_df["MoveScores"].apply(np.std)
        X["GameLen"] = my_df["MoveScores"].apply(len)
        X["MeanMoveScore"] = my_df["MoveScores"].apply(np.mean)
        X["ModeMoveScore"] = my_df["MoveScores"].apply(
            mode, axis=0).apply(lambda x: x[0][0])
        # X["ModeRound10MoveScore"] = my_df["MoveScores"].apply(lambda x:np.round(x,-1)).apply(mode,axis=0).apply(lambda x:x[0][0])
        X["SumMoveScore"] = my_df["MoveScores"].apply(np.sum)
        X["BlunderCount"] = my_df["MoveScores"].apply(catch_blunders)
        X["Results"] = my_df["Results"]
        # White Scores
        X["WhiteMaxMoveScore"] = my_df["WhiteMoveScores"].apply(np.max)
        X["WhiteMinMoveScore"] = my_df["WhiteMoveScores"].apply(np.min)
        X["WhiteRangeMoveScore"] = X["WhiteMaxMoveScore"] - X[
            "WhiteMinMoveScore"]
        X["WhiteIQRMoveScore"] = my_df["WhiteMoveScores"].apply(safe_iqr)
        X["WhiteMedianMoveScore"] = my_df["WhiteMoveScores"].apply(safe_median)
        X["WhiteSTDMoveScore"] = my_df["WhiteMoveScores"].apply(safe_std)
        X["WhiteMeanMoveScore"] = my_df["WhiteMoveScores"].apply(safe_mean)
        X["WhiteModeMoveScore"] = my_df["WhiteMoveScores"].apply(
            safe_mode).apply(lambda x: x[0][0])
        X["WhiteSumMoveScore"] = my_df["WhiteMoveScores"].apply(np.sum)
        X["WhiteBlunderCount"] = my_df["WhiteMoveScores"].apply(catch_blunders)
        # Black Scores
        X["BlackMaxMoveScore"] = my_df["BlackMoveScores"].apply(np.max)
        X["BlackMinMoveScore"] = my_df["BlackMoveScores"].apply(np.min)
        X["BlackRangeMoveScore"] = X["BlackMaxMoveScore"] - X[
            "BlackMinMoveScore"]
        X["BlackIQRMoveScore"] = my_df["BlackMoveScores"].apply(safe_iqr)
        X["BlackMedianMoveScore"] = my_df["BlackMoveScores"].apply(safe_median)
        X["BlackSTDMoveScore"] = my_df["BlackMoveScores"].apply(safe_std)
        X["BlackMeanMoveScore"] = my_df["BlackMoveScores"].apply(safe_mean)
        X["BlackModeMoveScore"] = my_df["BlackMoveScores"].apply(
            safe_mode).apply(lambda x: x[0][0])
        X["BlackSumMoveScore"] = my_df["BlackMoveScores"].apply(np.sum)
        X["BlackBlunderCount"] = my_df["BlackMoveScores"].apply(catch_blunders)
        #White Advantage
        #X["WhiteAdvantageMaxMoveScore"] = my_df["MoveScores"].apply(lambda x:filter(lambda elem:elem >= 0,x)).apply(safe_max)  #Useless
        X["WhiteAdvantageMinMoveScore"] = my_df[
            "WhiteAdvantageMoveScores"].apply(safe_min)
        X["WhiteAdvantageRangeMoveScore"] = X["MaxMoveScore"] - X[
            "WhiteAdvantageMinMoveScore"]
        X["WhiteAdvantageIQRMoveScore"] = my_df[
            "WhiteAdvantageMoveScores"].apply(safe_iqr)
        X["WhiteAdvantageMedianMoveScore"] = my_df[
            "WhiteAdvantageMoveScores"].apply(safe_median)
        X["WhiteAdvantageSTDMoveScore"] = my_df[
            "WhiteAdvantageMoveScores"].apply(safe_std)
        X["WhiteAdvantageCount"] = my_df["WhiteAdvantageMoveScores"].apply(len)
        X["WhiteAdvantageMeanMoveScore"] = my_df[
            "WhiteAdvantageMoveScores"].apply(safe_mean)
        X["WhiteAdvantageModeMoveScore"] = my_df[
            "WhiteAdvantageMoveScores"].apply(safe_mode).apply(
                lambda x: x[0][0])
        # X["WhiteAdvantageModeRound10MoveScore"] = my_df["MoveMoveScores"].apply(lambda x:filter(lambda elem:elem >= 0,x)).apply(lambda x:np.round(x,-1)).apply(safe_mode).apply(lambda x:x[0][0])

        #Black Advantage
        X["BlackAdvantageMaxMoveScore"] = my_df[
            "BlackAdvantageMoveScores"].apply(safe_max)
        #X["BlackAdvantageMinMoveScore"] = my_df["MoveMoveScores"].apply(lambda x:filter(lambda elem:elem < 0,x)).apply(safe_min)   #Useless
        X["BlackAdvantageRangeMoveScore"] = X[
            "BlackAdvantageMaxMoveScore"] - X["MinMoveScore"]
        X["BlackAdvantageIQRMoveScore"] = my_df[
            "BlackAdvantageMoveScores"].apply(safe_iqr)
        X["BlackAdvantageMedianMoveScore"] = my_df[
            "BlackAdvantageMoveScores"].apply(safe_median)
        X["BlackAdvantageSTDMoveScore"] = my_df[
            "BlackAdvantageMoveScores"].apply(safe_std)
        X["BlackAdvantageCount"] = my_df["BlackAdvantageMoveScores"].apply(len)
        X["BlackAdvantageMeanMoveScore"] = my_df[
            "BlackAdvantageMoveScores"].apply(safe_mean)
        X["BlackAdvantageModeMoveScore"] = my_df[
            "BlackAdvantageMoveScores"].apply(safe_mode).apply(
                lambda x: x[0][0])
        # X["BlackAdvantageModeRound10MoveScore"] = my_df["BlackAdvantageScores"].apply(lambda x:np.round(x,-1)).apply(safe_mode).apply(lambda x:x[0][0])

        #Partitioning

        ## All moves
        X["AllMoveScoresPartitionLen"] = my_df["Partition0MoveScores"].apply(
            len)
        for pt in range(params.partitions):
            X["Partition%dMaxMoveScore" % pt] = my_df["Partition%dMoveScores" %
                                                      pt].apply(safe_max)
            X["Partition%dMinMoveScore" % pt] = my_df["Partition%dMoveScores" %
                                                      pt].apply(safe_min)
            X["Partition%dRangeMoveScore" %
              pt] = X["Partition%dMaxMoveScore" %
                      pt] - X["Partition%dMinMoveScore" % pt]
            X["Partition%dIQRMoveScore" % pt] = my_df["Partition%dMoveScores" %
                                                      pt].apply(safe_iqr)
            X["Partition%dMedianMoveScore" %
              pt] = my_df["Partition%dMoveScores" % pt].apply(safe_median)
            X["Partition%dSTDMoveScore" % pt] = my_df["Partition%dMoveScores" %
                                                      pt].apply(safe_std)
            X["Partition%dMeanMoveScore" %
              pt] = my_df["Partition%dMoveScores" % pt].apply(safe_mean)
            X["Partition%dModeMoveScore" %
              pt] = my_df["Partition%dMoveScores" %
                          pt].apply(safe_mode).apply(lambda x: x[0][0])
            X["Partition%dSumMoveScore" % pt] = my_df["Partition%dMoveScores" %
                                                      pt].apply(np.sum)
            X["Partition%dBlunderCount" % pt] = my_df["Partition%dMoveScores" %
                                                      pt].apply(catch_blunders)

        ## White moves
        X["WhiteMoveScoresPartitionLen"] = my_df[
            "Partition0WhiteMoveScores"].apply(len)
        for pt in range(params.partitions):
            X["Partition%dMaxWhiteMoveScore" %
              pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(safe_max)
            X["Partition%dMinWhiteMoveScore" %
              pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(safe_min)
            X["Partition%dRangeWhiteMoveScore" %
              pt] = X["Partition%dMaxWhiteMoveScore" %
                      pt] - X["Partition%dMinWhiteMoveScore" % pt]
            X["Partition%dIQRWhiteMoveScore" %
              pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(safe_iqr)
            X["Partition%dMedianWhiteMoveScore" %
              pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(safe_median)
            X["Partition%dSTDWhiteMoveScore" %
              pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(safe_std)
            X["Partition%dMeanWhiteMoveScore" %
              pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(safe_mean)
            X["Partition%dModeWhiteMoveScore" %
              pt] = my_df["Partition%dWhiteMoveScores" %
                          pt].apply(safe_mode).apply(lambda x: x[0][0])
            X["Partition%dSumWhiteMoveScore" %
              pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(np.sum)
            X["Partition%dWhiteBlunderCount" %
              pt] = my_df["Partition%dWhiteMoveScores" %
                          pt].apply(catch_blunders)

        ## Black moves
        X["BlackMoveScoresPartitionLen"] = my_df[
            "Partition0BlackMoveScores"].apply(len)
        for pt in range(params.partitions):
            X["Partition%dMaxBlackMoveScore" %
              pt] = my_df["Partition%dBlackMoveScores" % pt].apply(safe_max)
            X["Partition%dMinBlackMoveScore" %
              pt] = my_df["Partition%dBlackMoveScores" % pt].apply(safe_min)
            X["Partition%dRangeBlackMoveScore" %
              pt] = X["Partition%dMaxBlackMoveScore" %
                      pt] - X["Partition%dMinBlackMoveScore" % pt]
            X["Partition%dIQRBlackMoveScore" %
              pt] = my_df["Partition%dBlackMoveScores" % pt].apply(safe_iqr)
            X["Partition%dMedianBlackMoveScore" %
              pt] = my_df["Partition%dBlackMoveScores" % pt].apply(safe_median)
            X["Partition%dSTDBlackMoveScore" %
              pt] = my_df["Partition%dBlackMoveScores" % pt].apply(safe_std)
            X["Partition%dMeanBlackMoveScore" %
              pt] = my_df["Partition%dBlackMoveScores" % pt].apply(safe_mean)
            X["Partition%dModeBlackMoveScore" %
              pt] = my_df["Partition%dBlackMoveScores" %
                          pt].apply(safe_mode).apply(lambda x: x[0][0])
            X["Partition%dSumBlackMoveScore" %
              pt] = my_df["Partition%dBlackMoveScores" % pt].apply(np.sum)
            X["Partition%dBlackBlunderCount" %
              pt] = my_df["Partition%dBlackMoveScores" %
                          pt].apply(catch_blunders)

        ## WhiteAdvantage moves
        X["WhiteAdvantageMoveScoresPartitionLen"] = my_df[
            "Partition0WhiteAdvantageMoveScores"].apply(len)
        for pt in range(params.partitions):
            X["Partition%dMaxWhiteAdvantageMoveScore" %
              pt] = my_df["Partition%dWhiteAdvantageMoveScores" %
                          pt].apply(safe_max)
            X["Partition%dMinWhiteAdvantageMoveScore" %
              pt] = my_df["Partition%dWhiteAdvantageMoveScores" %
                          pt].apply(safe_min)
            X["Partition%dRangeWhiteAdvantageMoveScore" %
              pt] = X["Partition%dMaxWhiteAdvantageMoveScore" %
                      pt] - X["Partition%dMinWhiteAdvantageMoveScore" % pt]
            X["Partition%dIQRWhiteAdvantageMoveScore" %
              pt] = my_df["Partition%dWhiteAdvantageMoveScores" %
                          pt].apply(safe_iqr)
            X["Partition%dMedianWhiteAdvantageMoveScore" %
              pt] = my_df["Partition%dWhiteAdvantageMoveScores" %
                          pt].apply(safe_median)
            X["Partition%dSTDWhiteAdvantageMoveScore" %
              pt] = my_df["Partition%dWhiteAdvantageMoveScores" %
                          pt].apply(safe_std)
            X["Partition%dMeanWhiteAdvantageMoveScore" %
              pt] = my_df["Partition%dWhiteAdvantageMoveScores" %
                          pt].apply(safe_mean)
            X["Partition%dModeWhiteAdvantageMoveScore" %
              pt] = my_df["Partition%dWhiteAdvantageMoveScores" %
                          pt].apply(safe_mode).apply(lambda x: x[0][0])
            X["Partition%dSumWhiteAdvantageMoveScore" %
              pt] = my_df["Partition%dWhiteAdvantageMoveScores" % pt].apply(
                  np.sum)
            X["Partition%dWhiteAdvantageBlunderCount" %
              pt] = my_df["Partition%dWhiteAdvantageMoveScores" %
                          pt].apply(catch_blunders)

        ## BlackAdvantage moves
        X["BlackAdvantageMoveScoresPartitionLen"] = my_df[
            "Partition0BlackAdvantageMoveScores"].apply(len)
        for pt in range(params.partitions):
            X["Partition%dMaxBlackAdvantageMoveScore" %
              pt] = my_df["Partition%dBlackAdvantageMoveScores" %
                          pt].apply(safe_max)
            X["Partition%dMinBlackAdvantageMoveScore" %
              pt] = my_df["Partition%dBlackAdvantageMoveScores" %
                          pt].apply(safe_min)
            X["Partition%dRangeBlackAdvantageMoveScore" %
              pt] = X["Partition%dMaxBlackAdvantageMoveScore" %
                      pt] - X["Partition%dMinBlackAdvantageMoveScore" % pt]
            X["Partition%dIQRBlackAdvantageMoveScore" %
              pt] = my_df["Partition%dBlackAdvantageMoveScores" %
                          pt].apply(safe_iqr)
            X["Partition%dMedianBlackAdvantageMoveScore" %
              pt] = my_df["Partition%dBlackAdvantageMoveScores" %
                          pt].apply(safe_median)
            X["Partition%dSTDBlackAdvantageMoveScore" %
              pt] = my_df["Partition%dBlackAdvantageMoveScores" %
                          pt].apply(safe_std)
            X["Partition%dMeanBlackAdvantageMoveScore" %
              pt] = my_df["Partition%dBlackAdvantageMoveScores" %
                          pt].apply(safe_mean)
            X["Partition%dModeBlackAdvantageMoveScore" %
              pt] = my_df["Partition%dBlackAdvantageMoveScores" %
                          pt].apply(safe_mode).apply(lambda x: x[0][0])
            X["Partition%dSumBlackAdvantageMoveScore" %
              pt] = my_df["Partition%dBlackAdvantageMoveScores" % pt].apply(
                  np.sum)
            X["Partition%dBlackAdvantageBlunderCount" %
              pt] = my_df["Partition%dBlackAdvantageMoveScores" %
                          pt].apply(catch_blunders)

        if type == "train":
            y_white = my_df["WhiteEloScore"].apply(int)
            y_black = my_df["BlackEloScore"].apply(int)
            dict = {}
            dict["X"] = X
            dict["y_white"] = y_white
            dict["y_black"] = y_black
            hkl.dump(
                dict,
                open(str(os.path.join(params.cache_dir, "train_features.hkl")),
                     "wb"))
            return X, y_white, y_black
        elif type == "test":
            dict = {}
            dict["X"] = X
            hkl.dump(
                dict,
                open(str(os.path.join(params.cache_dir, "test_features.hkl")),
                     "wb"))
            return X
Example #52
0
import pandas as pd
from pandas import DataFrame as df
from pandas import Series as s
import numpy as np

int_ary = np.arange(1, 17).reshape(4, 4)

masked = np.ma.masked_array(int_ary, mask=int_ary % 3 == 0)
print masked

masked = df(masked)
print masked
Example #53
0
 def orderbook(self):
     depth = self.session.get("%s/public/orderbook/%s" % (self.url, self.symbol), timeout=2).json()
     asks = np.array(df(depth["ask"])).astype(float)[:20]
     bids = np.array(df(depth["bid"])).astype(float)[:20]
     return {"ask":asks, "bid":bids}
Example #54
0
def read_DGS(filename):
    """
    Read a DigSilent Power Factory .dgs file and return a dictionary with the data
    Args:
        filename: File name or path

    Returns: Dictionary of data where the keys are the object types and the values
             are the data of the objects of the key object type
    """
    ###############################################################################
    # Read the file
    ###############################################################################
    f = open(filename, errors='replace')
    lines = f.readlines()
    f.close()

    ###############################################################################
    # Process the data
    ###############################################################################
    data = dict()
    """
    Numpy types:

    'b' 	boolean
    'i' 	(signed) integer
    'u' 	unsigned integer
    'f' 	floating-point
    'c' 	complex-floating point
    'O' 	(Python) objects
    'S', 'a' 	(byte-)string
    'U' 	Unicode
    'V' 	raw data (void)
    """
    """
    DGS types

    a
    p
    i
    r

    """
    types_dict = dict()
    types_dict["a"] = "|S32"
    types_dict["p"] = "|S32"
    types_dict["i"] = "<i4"
    types_dict["r"] = "<f4"
    types_dict["d"] = "<f4"

    types_dict2 = dict()

    current_type = None
    data_types = None
    header = None

    Headers = dict()
    # parse the file lines
    for line in lines:

        if line.startswith("$$"):
            line = line[2:]
            chnks = line.split(";")
            current_type = chnks[0]
            data[current_type] = list()
            print(current_type)

            # analyze types
            data_types = list()
            header = list()
            for i in range(1, len(chnks)):
                token = chnks[i].split("(")
                name = token[0]
                tpe = token[1][:-1]
                data_types.append((name, types_dict[tpe[0]]))
                header.append(name)

            types_dict2[current_type] = data_types

            Headers[current_type] = header

        elif line.startswith("*"):
            pass

        elif line.startswith("  "):
            if current_type is not None:
                line = line.strip()
                chnks = line.split(";")
                chnks = ["0" if x == "" else x for x in chnks]
                data[current_type].append(array(tuple(chnks)))

    # format keys
    for key in data.keys():
        print("Converting " + str(key))
        table = array([tuple(x) for x in data[key]], dtype=types_dict2[key])
        table = array([list(x) for x in table], dtype=np.object)
        header = Headers[key]
        data[key] = df(data=table, columns=header)

    # positions dictionary
    obj_id = data['IntGrf']['pDataObj'].values
    x_vec = data['IntGrf']['rCenterX'].values
    y_vec = data['IntGrf']['rCenterY'].values
    pos_dict = dict()
    for i in range(len(obj_id)):
        pos_dict[obj_id[i]] = (x_vec[i], y_vec[i])

    return data, pos_dict
Example #55
0
    def __init__(self,
                 col_list="default",
                 attr_list="default",
                 categorize=None,
                 data_opt='train',
                 scaling='mean-std',
                 holdout=0.3,
                 random_seed=42,
                 oversampler=None):
        self.col_list = col_list
        self.attr_list = attr_list

        if (col_list == 'default'):
            self.col_list = [
                'ADMISSIONS', 'ICUSTAYS', 'INPUTEVENTS_MV', 'PATIENTS'
            ]
        if (attr_list == 'default'):
            self.attr_list = {
                'ADMISSIONS':
                ['ADMISSION_TYPE', 'ADMITTIME', 'DISCHTIME', 'DEATHTIME'],
                'ICUSTAYS': ['LOS'],
                'INPUTEVENTS_MV': ['PATIENTWEIGHT'],
                'PATIENTS': ['DOB', 'GENDER']
            }

        for y in ['ADMITTIME', 'DISCHTIME', 'DEATHTIME']:
            if y not in self.attr_list['ADMISSIONS']:
                self.attr_list['ADMISSIONS'].append(y)

        self.datasetX = None
        self.datasetY = None

        col_list = self.col_list
        conn = pymysql.connect(host='192.168.56.104',
                               user='******',
                               password='******',
                               db='mimiciiiv14',
                               charset='utf8')
        curs = conn.cursor(pymysql.cursors.DictCursor)
        # Select LABEVENTS,SUBJECT_ID FROM LABEVENTS JOIN PATIENTS on LABEVENTS.SUBJECT_ID = PATIENTS.SUBJECT_ID JOIN ADMISSIONS on PATIENTS.SUBJECT_ID = ADMISSIONS.SUBJECT_ID
        sql_line = 'SELECT'
        for col in col_list:
            for attr in self.attr_list[col]:
                sql_line += ' ,' + col + '.' + attr
        sql_line += ' FROM ' + col_list[0]
        sql_line = sql_line[:7] + sql_line[8:]

        prev = col_list[0]
        for col in col_list[1:]:
            if col != 'PATIENTS':
                sql_line += ' JOIN {0} on {1}.SUBJECT_ID = {0}.SUBJECT_ID and {1}.HADM_ID = {0}.HADM_ID'.format(
                    col, prev)
            else:
                sql_line += ' JOIN {0} on {1}.SUBJECT_ID = {0}.SUBJECT_ID'.format(
                    col, prev)
            col_list[0] = col
        sql_line += ';'
        curs.execute(sql_line)
        result = curs.fetchall()
        print(df(result))
        # 여기부터
        self.datasetX = df(result)
        self.datasetY = self.datasetX[['ADMITTIME', 'DISCHTIME', 'DEATHTIME']]
        self.datasetX = self.datasetX.drop(
            ['ADMITTIME', 'DISCHTIME', 'DEATHTIME'], axis=1)

        self.datasetX = changeValue(self.datasetX)  #.to_numpy()
        for i in self.datasetX.columns:
            if self.datasetX[i].dtype == object:
                self.datasetX = pd.concat([
                    self.datasetX,
                    pd.get_dummies(self.datasetX[i], prefix=i)
                ],
                                          axis=1)
                del (self.datasetX[i])

        if ((type(categorize) is list) and categorize != None):
            for i in categorize:
                self.datasetX = pd.concat([
                    self.datasetX,
                    pd.get_dummies(self.datasetX[i], prefix=i)
                ],
                                          axis=1)
                del (self.datasetX[i])
        print(self.datasetX.shape)
        print(self.datasetX)
        self.datasetY = cal_days(self.datasetY)
        self.datasetY = self.datasetY.fillna(self.datasetY.mean())
        self.datasetY = self.datasetY.to_numpy()
        # 여기까지 뜯어 고쳐야함.

        X_train, X_test, y_train, y_test = train_test_split(
            self.datasetX,
            self.datasetY,
            test_size=holdout,
            random_state=random_seed)
        if (scaling == 'mean-std'):
            std_scaler = StandardScaler()
            X_train = std_scaler.fit_transform(X_train)
            X_test = std_scaler.transform(X_test)
        if (scaling == 'min-max'):
            scaler = MinMaxScaler()
            scaler.fit(X_train)
            X_train = scaler.transform(X_train)
            X_test = scaler.transform(X_test)

        if (data_opt == 'train'):
            self.X = torch.from_numpy(X_train)
            self.y = torch.from_numpy(y_train)
            if (oversampler == 'Random'):
                ros = RandomOverSampler(random_state=random_seed)
                self.X, self.y = ros.fit_resample(self.X, self.y)

            if (oversampler == 'ADASYN'):
                self.X, self.y = ADASYN(random_state=random_seed).fit_resample(
                    self.X, self.y)

            if (oversampler == 'SMOTE'):
                self.X, self.y = SMOTE(random_state=random_seed).fit_resample(
                    self.X, self.y)
        else:
            self.X = torch.from_numpy(X_test)
            self.y = torch.from_numpy(y_test)
        self.length = self.X.shape[0]
    def __init__(self):
        super(MyUi, self).__init__()
        self.ui = Ui_MainWindow()
        self.ui.setupUi(self)
        cwd = os.getcwd()
        cwd = str(cwd)
        if os.path.isfile(cwd + "/time"):
            with open("time", "r") as outfile:  #reads current time
                history = cPickle.load(outfile)
            if (datetime.now() - history
                ).total_seconds() < 43200:  #measures if time elapse>12 hours
                print("Less than 12 hours. Loading previously saved Pickle...")
                #with open("time","w") as infile: #update time
                #cPickle.dump(datetime.now(),infile)
                #            else:
                print("More than 12 hours. Updating Pickle...")
                data = ts.get_industry_classified()
                with open("class", "w+") as outfile:
                    cPickle.dump(data, outfile)
                now = datetime.now()
                with open("time", "w+") as outfile:  #update time
                    cPickle.dump(now, outfile)

        else:
            print("No Pickle found!"
                  )  #If this is first time using tuchart in this directory
            data = df()
            data = ts.get_industry_classified()
            with open('class', 'w+') as outfile:  #records pickle
                cPickle.dump(data, outfile)
            now = datetime.now()
            with open("time", "w+") as outfile:
                cPickle.dump(now, outfile)

        with open("class", "r") as infile:  # reads current time
            series = cPickle.load(infile)
        #series = pd.read_json(cwd + "\\class.json")
        #series = ts.get_industry_classified()
        series = pd.DataFrame(series)

        curdate = time.strftime(
            "%Y/%m/%d")  # gets current time to put into dateedit
        curdateQ = QDate.fromString(curdate, "yyyy/MM/dd")

        dateobj = datetime.strptime(curdate,
                                    "%Y/%m/%d")  #converts to datetime object

        past = dateobj - timedelta(days=7)  #minus a week to start date
        pasttime = datetime.strftime(past, "%Y/%m/%d")
        pastQ = QDate.fromString(
            pasttime,
            "yyyy/MM/dd")  #convert to qtime so that widget accepts the values

        pastL = dateobj - timedelta(days=30)  # minus a month to start date
        pasttimeL = datetime.strftime(pastL, "%Y/%m/%d")
        pastQL = QDate.fromString(pasttimeL, "yyyy/MM/dd")

        np_indexes = np.array([['sh', '上证指数', '大盘指数'], ['sz', '深证成指', '大盘指数'],
                               ['hs300', '沪深300指数', '大盘指数'],
                               ['sz50', '上证50', '大盘指数'],
                               ['zxb', '中小板', '大盘指数'], ['cyb', '创业板', '大盘指数']])
        indexes = df(data=np_indexes,
                     index=range(5000, 5006),
                     columns=["code", "name", "c_name"])
        series = indexes.append(series)
        list1_bfr = series["c_name"].tolist(
        )  #Get industry categories. Filters out redundant ones
        list1 = list(set(list1_bfr))
        list1.sort(key=list1_bfr.index)
        #w = database()
        #zsparent = QTreeWidgetItem(self.ui.treeWidget)
        #zsparent.setText(0,"股票指数")
        #zsnames =["上证指数-sh","深圳成指-sz","沪深300指数-hs300","上证50-"]

        self.init_treeWidget(list1, series)

        self.ui.treeWidget.setContextMenuPolicy(Qt.CustomContextMenu)
        self.ui.treeWidget.customContextMenuRequested.connect(self.openMenu)

        #self.ui.webView.setGeometry(QtCore.QRect(0, 30,1550, 861))
        file_path = os.path.abspath(
            os.path.join(os.path.dirname(__file__),
                         "render.html"))  #path to read html file
        local_url = QUrl.fromLocalFile(file_path)
        self.ui.webView.load(local_url)
        #self.ui.commandLinkButton.setFixedSize(50, 50)
        self.ui.search_btn.clicked.connect(lambda: self.search_comp(series))
        self.ui.log_btn.clicked.connect(lambda: self.log())
        self.ui.init_code_btn.clicked.connect(
            lambda: self.code_sort_tree(series))
        self.ui.init_category_btn.clicked.connect(
            lambda: self.init_treeWidget(list1, series))

        self.ui.commandLinkButton.clicked.connect(
            self.classify)  #when the arrow button is clicked, trigger events

        #self.ui.commandLinkButton.clicked.connect(lambda action: self.classify(action, self.ui.treewidget))
        #  QSizePolicy
        try:
            retain_size = self.ui.dateEdit_2.sizePolicy()
            retain_size.setRetainSizeWhenHidden(True)
            self.ui.dateEdit_2.setSizePolicy(retain_size)
            retain_size = self.ui.comboBox.sizePolicy()
            retain_size.setRetainSizeWhenHidden(True)
            #            self.ui.comboBox.setSizePolicy(retain_size)
            retain_size = self.ui.label_2.sizePolicy()
            retain_size.setRetainSizeWhenHidden(True)
            self.ui.label_2.setSizePolicy(retain_size)
        except AttributeError:
            print("No PYQT5 Binding! Widgets might be deformed")
        self.ui.dateEdit.setDate(pastQL)
        self.ui.dateEdit_2.setDate(curdateQ)  #populate widgets
        self.ui.dateEdit.setCalendarPopup(True)
        self.ui.dateEdit_2.setCalendarPopup(True)
        self.ui.comboBox.addItems(["D"])
        self.ui.treeWidget_2.setDragDropMode(self.ui.treeWidget_2.InternalMove)
        self.ui.treeWidget_2.setContextMenuPolicy(Qt.CustomContextMenu)
        self.ui.treeWidget_2.customContextMenuRequested.connect(
            self.openWidgetMenu)
        #self.ui.toolbutton.clicked.connect(lambda action: self.graphmerge(action, CombineKeyword))
        self.ui.combobox.currentIndexChanged.connect(
            lambda: self.modifycombo(pastQL, pastQ))
Example #57
0
    if row.Type == 'Sell':
        last_cpl = row[4]  # 'Cumulative P/L' column
        if isnan(last_cpl):
            break
        rowdict['Position Id'] = int(posid)
        rowdict['Date/Time'] = row[3]  # 'Date/Time' column
        rowdict['Order Type'] = row.Type
        rowdict['Profit/Loss'] = trade_pl
        rowdict['Cumulative P/L'] = last_cpl

    rows.append(rowdict)

# create an empyt equity curve DataFrame
#ecols = ['Position Id', 'Date/Time', 'Order Type',
#         'Profit/Loss', 'Cumulative P/L']
edf = df(rows)

#fig = px.line(edf, x='Date/Time', y='Cumulative P/L', title="Equity Curve");
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Candlestick(x=cdf['Date/Time'],
                             open=cdf['Open'],
                             high=cdf['High'],
                             low=cdf['Low'],
                             close=cdf['Close']),
              secondary_y=False)

fig.add_trace(go.Scatter(x=edf['Date/Time'], y=edf['Cumulative P/L']),
              secondary_y=True)

#fig.update_xaxes(rangebreaks=[dict(bounds=["sat", "mon"])])
Example #58
0
from pnn import PNN

if __name__ == '__main__':
    np.random.seed(42)
    try:
        train_set = np.array(read_csv("data/input/KDDTrain_procsd_redcd.csv"))
        test_set = np.array(read_csv("data/input/KDDTest_procsd.csv"))

        n = int(train_set.shape[1] - 1)

        input("Press 'Enter to start'")

        train_set_in = train_set[:, 0:n]
        train_set_out = train_set[:, n]

        test_set_in = test_set[:, 0:n]
        test_set_out = test_set[:, n]

        pnn = PNN(train_set_in, train_set_out)

        print("\tRECOGNITION  (testing set)")
        test_recog = pnn.recognize(test_set_in, test_set_out).squeeze()

        testDF = df({"expected": test_set_out, "recognized": test_recog})
        testDF.to_csv("data/output/KDD_testing_pnn.csv")

    except Exception as e:
        print("Exception occured:\n{}".format(e))
    finally:
        input("Press 'Enter to quit'")
Example #59
0
import requests
from pandas import DataFrame as df
import plotly.graph_objects as go

#Le pego a la api
r = requests.get('https://coronavirus-tracker-api.herokuapp.com/v2/locations')

#Pido la columna de location
r = df(r.json()['locations'])

lon = []
lat = []

for x in r['coordinates']:
    lon.append(x['longitude'])
    lat.append(x['latitude'])

r['lat'] = df(lat)
r['lon'] = df(lon)

confirmed = []
confirmed_size = []
deaths = []
deaths_size = []
recovered =[]
recovered_size = []

for x in  r['latest']:
    confirmed.append(x['confirmed'])
    confirmed_size.append(int(x['confirmed'])/700) 
    deaths.append(x['deaths'])
Example #60
0
def PrintData(x):
    print(df(x))