Beispiel #1
0
 def params_output(self):
     tf.reset_default_graph()
     self.creat_model()
     model_path = tf.train.latest_checkpoint(self.path_data +
                                             'model_save\\')
     saver = tf.train.Saver()
     #         sess = tf.InteractiveSession()
     with tf.Session() as sess:
         # 读取模型参数
         saver.restore(sess, model_path)
         #         tf.global_variables()
         #         sess=tf.InteractiveSession()
         #         sess.run(tf.global_variables_initializer())
         # 判断待存储参数的文件夹是否存在,若不存在,则新建一个。
         if not os.path.exists(self.path_data + 'model_paras\\'):
             os.makedirs(self.path_data + 'model_paras\\')
         # 遍历模型参数并存储到csv文件里
         for var in tf.global_variables():
             print(var)
             var_value = sess.run(var)
             var_name = var.name.replace(':', '_').replace(r'/', '_')
             dataframe(var_value).to_csv(
                 self.path_data + u'model_paras\\{}.csv'.format(var_name),
                 sep=',',
                 encoding='utf_8_sig')
     #     w_1=tf.get_default_graph().get_tensor_by_name('w:0')
     #     w_1=dataframe(w_1.eval())
     #     w_1.to_csv('.\para_output'+u'\\w_1'+u'.csv',sep=',',encoding='utf_8_sig')
     return
def import_file_to_database(conn):
    df = pd.read_csv('owid-covid-data.csv')
    data = pd.read_csv('country-and-continent-codes-list-csv_csv.csv')
    pd.dataframe(data)

    # clean_characters(df)
    df = df.replace(np.nan, 0)
    df = df.set_index('iso_code')
    df = df.drop(0)
    convert_data_types(df)
    delete_columns_from_dataframe(df)
    df.to_sql('DailyCOVID192020', conn, if_exists='replace', index=True)
Beispiel #3
0
def OHLC_resample(dataframe, timeframe, column = 'ask'):

    # =============================================== DESCRIPTION ======================================================

    # OHLC Resampler to turn raw data into candlestick data if required, and to switch timeframes, e.g. from 1M to 1H.

    # param (1) - dataframe: dataframe containing the data we want to resample.
    # param (2) - timeframe: timeframe that we want for resample.
    # param (3) - column: which column we are resampling (bid or ask), default set to ask.
    # return (1) - resampled OHLC data for the given timeframe.

    grouped = dataframe.groupedby('Symbol')

    # The below is to turn raw data into candlestick format. This will be required if tick data

    if np.any(dataframe.columns == 'Ask'):

        if column == 'ask':
            ask = grouped['Ask'].resample(timeframe).ohlc()
            ask_vol = grouped['Ask_Vol'].resample(timeframe).count()
            resampled = pd.dataframe(ask)
            resampled['Ask_Vol'] = ask_vol

        elif column == 'bid':
            bid = grouped['Bid'].resample(timeframe).ohlc()
            bid_vol = grouped['Bid_Vol'].resample(timeframe).count()
            resampled = pd.dataframe(bid)
            resampled['Bid_Vol'] = bid_vol

        else:

            raise ValueError('Column must be a strong. Either ask or bid')

    # Now resample data that us already in 'candlestick' format (OHLC). To a higher time period

    elif np.any(dataframe.columns == 'CLOSE'):
        open = grouped['OPEN'].resample(timeframe).ohlc()
        close = grouped['CLOSE'].resample(timeframe).ohlc()
        high = grouped['HIGH'].resample(timeframe).ohlc()
        low = grouped['LOW'].resample(timeframe).ohlc()
        ask_vol = grouped['Volume'].resample(timeframe).count()

        resampled = pd.Dataframe(open)
        resampled['HIGH'] = high
        resampled['LOW'] = low
        resampled['CLOSE'] = close
        resampled['Volume'] = ask_vol

    resampled = resampled.dropna()

    return resampled
Beispiel #4
0
def freqPrcnt(dsn, var):
    dataset = pd.dataframe(dsn)
    uniq_prcnt = round(prop_table(ftable(dataset[var]), 1) * 100, digits=1)
    lngth = nlevels(unique(dataset[var]))
    prcnt = dict()
    for i in range(1, lngth):
        prcnt[i] = uniq_prcnt[i]
Beispiel #5
0
    def remap(self, df):
        '''
        Remap the colums of input data to Chirp-formatted output data.

        If multiple columns map to the same Chirp-column, they will be concatenated into that column.

        Args:
            df (pd.DataFrame)

        Returns:
            pd.DataFrame
        '''
        return df.rename(self.mapping, inplace=True)

        out_columns = {}
        for k in df.keys():
            if k in self.mapping.keys:
                chirp_field = self.mapping[k]
                if chirp_field is not None:
                    out_columns[chirp_field] = self._array_new_append(
                        k, out_columns[chirp_field])
        out = pd.dataframe()
        for k in out_columns.keys():
            for k2 in out_columns[k]:
                out[k]

        return out

        def _array_new_append(self, v, a):
            ''' Add value v to array a, or create a new array '''
            if a is None:
                return [v]
            else:
                a.append(v)
                return v
Beispiel #6
0
def to_df(X, Y, qualitatif=False):
    """transform X and Y to a pandas dataframe."""
    df = pd.dataframe(np.c_[X, Y])
    df.columns = [
        'elevation', 'aspect', 'slope', 'horizontal_distance_to_hydrology',
        'vertical_distance_to_hydrology', 'horizontal_distance_to_roadways',
        'hillshade_9am', 'hillshade_noon', 'hillshade_3pm',
        'horizontal_distance_to_fire_points', 'wilderness_area1',
        'wilderness_area2', 'wilderness_area3', 'wilderness_area4',
        'soil_type1', 'soil_type2', 'soil_type3', 'soil_type4', 'soil_type5',
        'soil_type6', 'soil_type7', 'soil_type8', 'soil_type9', 'soil_type10',
        'soil_type11', 'soil_type12', 'soil_type13', 'soil_type14',
        'soil_type15', 'soil_type16', 'soil_type17', 'soil_type18',
        'soil_type19', 'soil_type20', 'soil_type21', 'soil_type22',
        'soil_type23', 'soil_type24', 'soil_type25', 'soil_type26',
        'soil_type27', 'soil_type28', 'soil_type29', 'soil_type30',
        'soil_type31', 'soil_type32', 'soil_type33', 'soil_type34',
        'soil_type35', 'soil_type36', 'soil_type37', 'soil_type38',
        'soil_type39', 'soil_type40', 'cover_type'
    ]
    if qualitatif:
        # a poursuivre : peut être avec grep plutot que les index, et pour les wilderness aussi
        # all soil columns in one
        df['soil'] = None
        idX_soil_debut = 14
        idX_soil_fin = 54
        for i in range(idX_soil_debut, idx_soil_fin):
            l_idX_bool = df.iloc[:, i].astype(bool)
            if True in l_idX_bool:
                df.loc[l_idX_bool, 'soil'] = df.columns[i]
        df.drop(df.columns[idX_soil_debut:idx_soil_fin], axis=1, inplace=True)

    return df
Beispiel #7
0
def generate_freq(df):
    '''Takes an input data frame and generates a histogram of number of tweets binned by month.

    Inputs: Pandas data frame imported from *.csv or *.pkl file
            Input parameter called "increment", which determined by what time interval the tweets are organized
    Outputs: Histogram
    '''

    date_bounds = pd.Series(df['tweet_time'].tolist())
    date_bounds_ym = (date_bounds.dt.strftime('%Y-%m')).tolist()
    df['date_md'] = np.array(date_bounds_ym)

    sort = df.sort_values(by=['date_md'])

    frq = sort['date_md'].value_counts().to_dict()
    frq_df = sort['date_md'].value_counts()

    od = collections.OrderedDict(sorted(frq.items()))
    rf_dates = list()

    for item in list(od.keys()):
        date_rf = date_reformat(item)
        rf_dates.append(date_rf)

    data = {"Date": rf_dates, "Freq": list(od.values())}
    graph_frame = pd.dataframe(data=data)
    frq_df.to_csv(os.path.join(path, 'tweet_freq_' + today + '.csv'))

    ax = graph_frame.plot.bar(x="Date", y="Freq", rot=45)

    plt.show()
Beispiel #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--img_url', type=str, required=True, help='input url')
    parser.add_argument(
        '--output_dir',
        type=str,
        default='results/',
        help='output directory to save the results (default: results/')
    parser.add_argument('--model',
                        type=str,
                        default='face_model.pkl',
                        help='path to trained model (default: face_model.pkl)')

    args = parser.parse_args()
    output_dir = args.output_dir
    image_url = args.img_url
    model_path = args.model

    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    # load the model
    with open(model_path, 'rb') as f:
        clf, labels = pickle.load(f, encoding="latin1")
    pred, locs = predict_one_image(image_url, clf, labels, True)
    if not locs:
        print("No people found!")
        return
    locs = pd.dataframe(locs, columns=['top', 'right', 'bottom', 'left'])
    df = pd.concat([pred, locs], axis=1)
    img = draw_attributes(image_url, df)
    cv2.imwrite(os.path.join(output_dir, "image.jpg"), img)
    output_csvpath = os.path.join(output_dir, 'image.csv')
    df.to_csv(output_csvpath, index=False)
Beispiel #9
0
 def fetch_file(self):
     try:
         if not os.path.exists('../data'):
             os.makedirs('../data')
     except OSError:
         print('Plis, place me in the right directory idiot.')
         exit()
     self.dataset = pd.dataframe()
def main():
    summary_data = get_summary_data()
    capital_data = summary_data['assets_under_management_B']
    weight = pd.dataframe(capital_data / sum(capital_data).todataframe)
    weight.to_csv(
        r"C:\My Files\Study\17 Spring\800 - Special Problems in FE (MS)\Code\FE-800\csv\weight.csv"
    )
    print(summary_data)
Beispiel #11
0
 def fitModel(self, X, y, X_test, n):
     """
     function to optimize parameter and x and y being training data for base models and n being splitting ratio for validation set
     kindly realize that final metamodel is trained upon
     and X_test being testing or realtime data
     """
     self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(
         X, y, test_size=n, random_state=42)
     self.df_val=pd.dataframe(X_val)
     self.df_test=pd.dataframe(X_test)
     for model in self.base_models_:
         model.fit(X_train, y_train)
         self.val_pred=model.predict(X_val)
         self.df_val=pd.concat([df_val,val_pred],axis=1)
         self.val_test=model.predict(X)
         self.df_test=pd.concat([df_test,val_test],axis=1)
     # Now train the cloned  meta-model using the out-of-fold predictions as new feature
     self.meta_model_.fit(self.df_val, self.y_val)
Beispiel #12
0
def IterateCheck_UR_Stable(row):
    # takes row from df of unique eq
    # take equilibrium value

    new_u_vals_mat = np.array(uvec).reshape(n, 3) - np.transpose(
        du1vec, du2vec, dbuvec)

    #
    newdf = pd.dataframe(data=new_u_vals_mat, columns=['u1', 'u2', 'bu'])
Beispiel #13
0
def filter(method='linear', df=None, fs=1000, cutoff="50", order=2):
    #takes in a dataframe, a sampling frequency (1 kHz by default), a cutoff frquency, and an order and applies a lowpass butterworth filter to the acceleration data
    df["ax"] = df["ax"].interpolate(method=method, limit_direction='forward')
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    data = df["ax"].to_numpy()
    y = lfilter(b, a, data)
    data_out = pd.dataframe(y)
    return data_out
def get_korean_symbol_dataframe():
    df_korSym = dataframe()
    df_korSym['Hangul'] = META_TAGS + HANGULS
    df_korSym['Symbol'] = META_TAGS + SYMBOLS
    # df_korSym['Hangul'] = HANGULS
    # df_korSym['Symbol'] = SYMBOLS
    # for tag in META_TAGS:
    #       df_korSym.loc[len(df_korSym)] = [tag, tag]

    return df_korSym
Beispiel #15
0
def states_game():
    right_tries = 0
    wrong_tries = 0
    tammy.penup()
    screen.tracer(0)

    tammy.goto(100, 280)
    tammy.hideturtle()
    tammy.write(f"Right tries{right_tries}/50",
                align="center",
                font=("Courier", 20, "normal"))
    tim.goto(100, 260)
    tim.write(f"Wrong tries{wrong_tries}",
              align="center",
              font=("Courier", 20, "normal"))
    guessed_states = []

    while len(guessed_states) < 50:
        answer_state = screen.textinput(
            title="Guess the state",
            prompt="Whats another state name ?").title()

        if answer_state == "Exit":
            missing_states = []
            for state in list_states:
                if state not in guessed_states:
                    missing_states.append(state)
            new_data = pd.dataframe(missing_states)
            new_data.to_csv("states_to_learn.csv")

        for dic in states_dict:
            for key in dic:
                if dic[key] == answer_state:

                    right_tries += 1
                    tammy.clear()
                    tammy.hideturtle()
                    tammy.write(f"Right tries{right_tries}/50",
                                align="right",
                                font=("Courier", 20, "normal"))

                    guessed_states.append(dic[key])
                    x = dic['x']
                    y = dic['y']
                    timmy.hideturtle()
                    timmy.penup()
                    timmy.goto(x, y)
                    timmy.write(answer_state, align='center', font=FONT)
                elif answer_state not in list_states and answer_state != "Exit":
                    wrong_tries += 1
                    tim.clear()
                    tim.write(f"Wrong tries{wrong_tries}",
                              align="left",
                              font=("Courier", 20, "normal"))
Beispiel #16
0
def tree2OTU_table(mvp_tree):
    """ get an OTU table from a mvp_tree
    """
    series = []
    for terminal in mvp_tree.feature_tree.get_terminals():
        try:
            series.append(terminal.sample_series)
        except:
            print('there is no sample series in tree2OTU ')
    df = pd.dataframe(series)
    return df
Beispiel #17
0
	def __init__(self, symbol, margin_ratio=None, instrument_type, point_value=None):
		'''Init an instrument, either stock or future. 
		For stock, the margin_ratio should be 1 for default.
		For Furture, you need to give margin_ratio and point_value.'''
		self.symbol = symbol
		if margin_ratio is None:
			self.margin_ratio = 1
		self.instrument_type = instrument_type
		if self.instrument_type == InstrumentType.Future:
			assert not point_value is None
			self.point_value = point_value
		else:
			self.point_value = 1
		self.df = pd.dataframe()
Beispiel #18
0
    def scale_ingredients_df(self, times):
        """
        Scale ingredients,
        simple scaling

        :param ingredients:
        :param times:
        :return:
        """
        df = pd.dataframe(self.recipe)
        ingredients = self.get_ingredients(df)
        scaled = ingredients['amount'] * times

        return scaled
Beispiel #19
0
def unpacking_bike_numbers(column):
    """
    getting unique list of bikes
    """
    bike_unpack = pd.dataframe(df[column].tolist(), index=df.index)
    colnames = list(bike_unpack.columns.values)
    all_bikes = []
    all_bikes = bike_unpack[0]

    for c in colnames:
        data = bike_unpack[c]
        pd.concat([all_bikes, data])
    all_bikes = all_bikes.unique()
    return all_bikes
Beispiel #20
0
def budpredict():
    connection = pg.connect("host='" + DB['host'] + "' dbname=" +
                            DB['dbname'] + " user="******" password='******'password'] + "'")
    dl = pd.read_sql_query(
        "select table_name from table_meta where table_meta_id='" +
        request.json['table_meta_id'] + "';",
        con=connection)
    tname = dl['table_name'][0]
    target = request.json['target'][0]
    ohl = {
        'poutcome': ['poutcome_other', 'poutcome_success', 'poutcome_unknown'],
        'marital': ['marital_married', 'marital_single'],
        'education':
        ['education_secondary', 'education_tertiary', 'education_unknown'],
        'default': ['default_yes'],
        'housing': ['housing_yes'],
        'contact': ['contact_telephone', 'contact_unknown'],
        'month': [
            'month_aug', 'month_dec', 'month_feb', 'month_jan', 'month_jul',
            'month_jun', 'month_mar', 'month_may', 'month_nov', 'month_oct',
            'month_sep'
        ],
        'loan': ['loan_yes'],
        'y': ['y_yes'],
        'job': [
            'job_blue-collar', 'job_entrepreneur', 'job_housemaid',
            'job_management', 'job_retired', 'job_self-employed',
            'job_services', 'job_student', 'job_technician', 'job_unemployed',
            'job_unknown'
        ]
    }
    df = pd.dataframe(request.json['coldata'])
    for cols in list(df.columns):
        if (cols in ohl.keys()):
            df_new = pd.DataFrame(columns=ohl[cols])
            for i, r in df.iterrows():
                df_new.loc[i] = [0] * len(ohl[cols])
                value = cols + '_' + df[cols][i]
                if (value in ohl[cols]):
                    df_new[value][i] = 1
            df = df.drop(columns=cols)
            df = pd.concat([df, df_new], axis=1, sort=True)
    with open('tpot_class_' + tname_nc0g1 + '.json', 'r') as fp:
        col = json.load(fp)
    mod = joblib.load('tpot_pipeline_' + tname + '_' + target + '.pkl')
    md = mod.predict(df)
    res = {'data': col[target][int(md[0])], 'Reply': 'Here you go'}
    return Response(response=json.dumps(res), status=200)
Beispiel #21
0
def getMyRetweetsFavourites(number):
    data = getMyTweetsData(number)
    retweets = data[2::4]
    favourites = data[3::4]
    for e in favourites:
        e = int(e)
    for e in retweets:
        e = int(e)
    #print(data)
    sum_f = sum(favourites)
    sum_r = sum(retweets)
    d = {'retweets': [sum_r], 'favourites': [sum_f]}
    df = dataframe(data=d)
    return (['retweets','favourites'],
            [sum_r,sum_f])
def openCsv(filePath, default = ["new df here"]):
    """
        returns the content of the csv file if it exists.
    
        Parameters:
        - filePath: the absolute or relative path to the .csv file
        - default: default value to load if the file is not located
    """
    try:
        content = pd.read_csv(filePath, error_bad_lines=False)
    except Exception:
        print(f"exception, the filename {filePath} you requested to open was not found.")
        if (int(input("do you want to make a new file? 1 for yes, 0 for no")) == 1):
            content = pd.dataframe(default)
            content.toCsv(filePath, index=False, header=False)
    return content
Beispiel #23
0
def read_all(filetype, startdir='.', recursive=True):
    '''
	Read in all files of common type from a folder, concatenate by row into pandas dataframe

	filetype = string denoting which file to look for, including globs
	startdir = directory to start looking in
	recursive = search subdirectories?
	'''
    out = pd.dataframe()  # initiate df for output
    for dirpath, dirs, files in os.walk(startdir):
        for file in files:
            contents = read_default(os.path.join(dirpath, file))
            contents['file'] = os.path.basename(file)
            out.append(contents)

    return out
Beispiel #24
0
 def build_comparison_table(self,exhibits):
     '''
     Makes a pandas data frame comparing exhibits
     '''
     comparison = pd.dataframe(columns = self.ex_list, index = self.ex_list)
     
     for ex in self.ex_list():
         comparison[ex][ex] = 0.0
     for (ex1,ex2) in itertools.combinations(self.ex_list,2):
         v1 = self.ex_vects[ex1[:2]][ex1]
         v2 = self.ex_vects[ex2[:2]][ex1]
         d = spatial.distance.cosine(v1,v2)
         comparison[ex1][ex2] = d
         comparison[ex2][ex1] = d
     
     return comparison
Beispiel #25
0
def fetch_csv(directory='/home/', ticker='AMZN'):
    try:
        data = pd.read_csv('data-' + ticker + '.csv', sep='\t')
    except Exception:
        print('Reading csv for ticker %s failed' % ticker)
        try:
            AlexandreScraper.somethingScraper(directory=directory,
                                              ticker=ticker)
        except Exception:
            print('Scrapping failed for ticker %s ' % ticker)
            data = pd.dataframe()
        else:
            data = pd.read_csv('data-' + ticker + '.csv', sep='\t')
    else:
        print('Read csv for ticker %s successful' % ticker)

    return data
Beispiel #26
0
def predict_test():
	vector = pd.dataframe([request.json])

	if (database.query(vector.id)) ==True:
		loaded_model = joblib.load(filename)
		res = str(loaded_model.predict(vector)[0])
		result = [

		{
			'id':0
			'prediction':res
		}


		]

	return jsonify(result)

	else:
Beispiel #27
0
def dbupdate_contractfundamentals(db, stklst):
    # stklst[i] conté [fConId, fType, fSymbol, fLocalSymbol, fCurrency, fExchange, fTradingClass, fRating, fTradeType, fScanCode,fEpsNext,fFrac52wk, fBeta, fPE0, fDebtEquity, fEVEbitda,fPricetoFCFShare, fYield,fROE, fTargetPrice, fConsRecom,fProjEPS, fProjEPSQ, fProjPE) "
    #         ]
    try:
        for i in range(len(stklst)):
            print('Updating contractfundamentals ' + str(stklst[i][0]))
            sql = "SELECT * FROM contractfundamentals WHERE fConId = '" + str(
                stklst[i][0]) + "' "
            rst = execute_query(db, sql)
            #print("rst   ",rst[0][2],rst[0][19])
            #print("stklsttttttttttt   ",stklst[i][19])

            if type(stklst[i][19]) == "float" and type(rst[0][19]) == "float":
                stklst[i][6] = stklst[i][19] / rst[0][19]
            else:
                stklst[i][6] = 0

            print("fSymbol  ", rst[0][2], "fTargetPrice  ", rst[0][19],
                  "newtargetprice   ", stklst[i][19])
            sql = "UPDATE contractfundamentals SET frating = 0"
            execute_query(db, sql)
            '''
            if stklst[i][5] == 0:
                
            elif stklst[i][5] == 'M':
                sql = "UPDATE trades SET tShares = tShares - " + str(stklst[i][3]) + " ,tActive = 0 where tId = " + str(stklst[i][0])
                execute_query(db, sql)
                sql = "INSERT INTO trades (tExecid, tAccId, tConId, tTime, tShares, tPrice, tCommission, tLiquidation, "
                sql = sql + "toptPrice, toptIV, toptDelta, toptGamma, toptVega, toptTheta, toptPVDividend, toptPriceOfUnderlying, tActive) "
                # al nou registre, modifiquem l'Execid afegin-hi una C a davant, tActive=1 i tShares = stklst[i][5]
                new_execid = 'C' + stklst[i][1]
                sql = sql + "SELECT '" + new_execid + "',tAccId, tConId, tTime," + str(stklst[i][3]) + ", tPrice, tCommission, tLiquidation, "
                sql = sql + "toptPrice, toptIV, toptDelta, toptGamma, toptVega, toptTheta, "
                sql = sql + "toptPVDividend, toptPriceOfUnderlying, 1 "  # active = 1
                sql = sql + "FROM trades WHERE tId = " + str(stklst[i][0])
                execute_query(db, sql)
            '''

        selectedstocks = pd.dataframe(stklst)
        print("selectedStocks", selectedstocks)
    except Exception as err:
        print(err)
        raise
Beispiel #28
0
def calculate(dataset_x, dataset_y):
    loss_list = ['benign', 'dos', 'probe', 'u2r', 'r2l']
    model.fit(dataset_x, dataset_y)
    model_predicted = model.predict(test_x)
    model_predicted = pd.dataframe(model_predicted)
    model_predicted.columns = loss_list

    from sklearn.metrics import precision_score, mean_squared_error, f1_score
    precision_score = precision_score(test_y, model_predicted, average='micro')
    print("The precision at round ", round1, ' is ', precision_score)
    class_proba = model.predict_proba(test_x)

    loss_function = []

    for columns in loss_list:
        loss_function.append(
            mean_squared_error(test_y[columns], class_proba[columns]))

    average_loss = int(sum(loss_function) / 5)
    print("The loss in ", round1, ' is average_loss')
Beispiel #29
0
def frame_filter(dataframe_col, year_repl):
    dataframe_col = pd.dataframe(dataframe_col[year_repl].dropna())
    dataframe_col_re = dataframe_col[year_repl].str.split(
        ",", expand=True).add_prefix("col")
    dataframe_col_re.iloc[:, -1] = dataframe_col_re.iloc[:, -1].apply(int)
    female_data = dataframe_col_re[dataframe_col_re["col_1"]
                                   == "F"].sort_values("col_2",
                                                       ascending=False).head()
    male_data = dataframe_col_re[dataframe_col_re["col_1"] == "M"].sort_values(
        "col_2", ascending=False).head()
    sum_year = dataframe_col_re["col_2"].sum()
    gender_sum = pd.pivot_table(dataframe_col_re,
                                values=['col_2'],
                                column=["col_1"],
                                aggfunc=np.sum)
    plt.pie([gender_sum["F"], gender_sum["M"]],
            explode=[0, 0],
            labels=["female", "male"],
            autopct="%1.1f%%")
    plt.axis("equal")
    return plt.show()
Beispiel #30
0
def clean(dataframe):
    ''' 
	Cleans raw data. 
	'''
    if isinstance(dataframe, pd.DataFrame):
        # modify content here
        if ('question1' in dataframe.columns):
            dataframe['question1_raw'] = dataframe.question1
            dataframe['question1'] = clean_question(dataframe.question1)

        if ('question2' in dataframe.columns):
            dataframe['question2_raw'] = dataframe.question2
            dataframe['question2'] = clean_question(dataframe.question2)

        if ('is_duplicate' in dataframe.columns):
            series = dataframe.is_duplicate
            series = series.apply(pd.to_numeric)
            dataframe['is_duplicate'] = series

        if ('id' in dataframe.columns):
            series = dataframe.id
            series = series.apply(pd.to_numeric)
            dataframe['id'] = series

        if ('qid1' in dataframe.columns):
            series = dataframe.qid1
            series = series.apply(pd.to_numeric)
            dataframe['qid1'] = series

        if ('qid2' in dataframe.columns):
            series = dataframe.qid2
            series = series.apply(pd.to_numeric)
            dataframe['qid2'] = series
            return dataframe
    else:
        raise InputError('dataframe', 'input object is not a pd.DataFrame')
        return pd.dataframe()
	def get_daily_candle(self):
		'''Create daily candle data from existing dataframe'''
		self.daily_df = pd.dataframe()
Beispiel #32
0
unique = [k for k,g in itertools.groupby(sorted(toy))]
unique
toy
toy.idx
toy.ix
toy.index.values
toy[tnode]=toy.index.values
toy['tnode']=toy.index.values
toy['node']=toy.index.values
toy
itertools.groupby(AR)
itertools.groupby(toy.AR)
unique = [k for k,g in itertools.groupby(toy.AR)]
unique
unique.ix=unique.index.values
df = pd.dataframe(unique)
df = pd.DataFrame(unique)
df
df['k']=df.index.values
df
df.columns=['AR','k']
mg = pd.join(toy, df, on='AR')
mg = pd.merge(toy, df, on='AR')
mg
unique = [k for k,g in itertools.groupby()]
import readline
for i in range(readline.get_current_history_length()):
    print readline.get_history_item(i)
unique = [k for k,g in itertools.groupby(mg)]
unique
itertools.groupby(mg).groups()
scores_nbFeatures_easy = []
scores_nbFeatures_hard = []
scores_PCA_easy = []
scores_PCA_hard = []

for nb_feature in number_features:
    PCA_model = PCA(number_features)
    X_tmp = PCA_model.fit_transform(X_train_facile)
    print X_tmp.shape
    score_easy = {}
    score_hard = {}
    for (name,func) in possible_distances:
        dist = distances_pairs(X_tmp, pairs_idx,func)
        score_facile , score_difficile = roc_report(pairs_label,dist,name,False)
        score_easy[name] = score_facile
        score_hard[name] = score_difficile
    scores_PCA_easy.append(score_easy)
    scores_PCA_hard.append(score_hard)
    print pd.dataframe(scores_PCA_easy)
    
plt.plot(number_features,scores_nbFeatures_easy,label='score facile')
plt.plot(number_features,scores_nbFeatures_hard,label='score difficile')
plt.legend(loc='best')
plt.savefig('Importance de la PCA.png')
plt.show()

'''
X_train_hard, Y_train_hard = readfile('data_train_difficile')
X_test_facile, Y_test_facile = readfile('data_test_facile')
X_test_hard, Y_test_hard = readfile('data_test_difficile')
'''
Beispiel #34
0
def create(y_predict):
    y_dataframe = pd.dataframe(y_predict)
    y_dataframe.to_csv()
Beispiel #35
0
# This is  my test file for

import pandas as pd

newDataframe = pd.dataframe()

print(type(newDataframe))