コード例 #1
0
ファイル: parsers.py プロジェクト: daoos/old-stats-parser
    def _get_value(self):
        """Parse values based on "m$n", ";" and ")" delimiters"""
        RV = []

        # first value, between "m$n" and ";"
        # add +4 because "m$n" delimiter has 3 chars, plus 1 white space
        value_start = self.row[0].find("m$n") + 4
        value_end = self.row[0].find(";")

        # get substring, convert to float and append
        str_value = self.row[0][value_start:value_end]
        float_value = convert_to_float(str_value)
        RV.append(float_value)

        # second value, between last "m$n" and ")"
        # add +4 because "m$n" delimiter has 3 chars, plus 1 white space
        value_start = self.row[0].rfind("m$n") + 4
        value_end = self.row[0].find(")")

        # get substring, convert to float and append
        str_value = self.row[0][value_start:value_end]
        float_value = convert_to_float(str_value)
        RV.append(float_value)

        return RV
コード例 #2
0
ファイル: guosen_utils.py プロジェクト: dxcv/VectorTrader
def get_rights_issue(ticker, start_date, end_date):
    '''
    获取股票已实施配股数据。
    
    Parameters
    ----------
    ticker
        '600340'
    start_date
        '20100101'
    end_date
        '20150101'
    Returns
    --------
    DataFrame
        index ex_rights_date
        columns 
            'ex_rights_date','rights_issue_per_stock','rights_issue_price',
            'transfer_rights_issue_per_stock','transfer_price'
                (除权日,每股配股,配股价,每股转配,每股转配价)
    '''
    sql_select = '''
    SELECT [除权日]
      ,[配股比例分子]
      ,[配股价格]
      ,[转配比例分子]
      ,[转让费]
      FROM [BasicData].[dbo].[Yi_RightsIssue]
      WHERE [除权日] is not null
      AND [stockcode] = '%s'
    ''' % (ticker)
    cur.execute(sql_select)
    data = cur.fetchall()
    columns = [
        'ex_rights_date', 'rights_issue_numerator', 'rights_issue_price',
        'transfer_rights_issue_numerator', 'transfer_rights_issue_fee'
    ]
    df = pd.DataFrame(data, columns=columns)
    df = df.fillna(0)
    df['ex_rights_date'] = matlab_time_convert(df['ex_rights_date'])
    df['rights_issue_price'] = convert_to_float(df['rights_issue_price'])
    df['rights_issue_per_stock'] = \
            convert_to_float(df['rights_issue_numerator']) / 10.0
    df['transfer_rights_issue_per_stock'] = \
            convert_to_float(df['transfer_rights_issue_numerator']) / 10.0
    df['transfer_price'] = \
            df['rights_issue_price'] + convert_to_float(df['transfer_rights_issue_fee'])
    df = df[[
        'ex_rights_date', 'rights_issue_per_stock', 'rights_issue_price',
        'transfer_rights_issue_per_stock', 'transfer_price'
    ]]
    df = df.set_index('ex_rights_date', drop=False)
    df = df.sort_index()
    df = df[start_date:end_date]
    return df
コード例 #3
0
def update_rover(Rover, data):
    # Retrieve current kinematic and control values
    Rover.vel = convert_to_float(data["speed"])  # meters/sec
    Rover.pos = [
        convert_to_float(pos.strip()) for pos in data["position"].split(';')
    ]
    Rover.yaw = convert_to_float(data["yaw"])
    Rover.pitch = convert_to_float(data["pitch"])
    Rover.roll = convert_to_float(data["roll"])
    Rover.throttle = convert_to_float(data["throttle"])
    Rover.steer = convert_to_float(data["steering_angle"])

    # Initialize start time and sample positions
    if Rover.start_time == None:
        Rover.start_time = time.time()
        Rover.total_time = 0
        samples_xpos = np.int_([
            convert_to_float(pos.strip())
            for pos in data["samples_x"].split(';')
        ])
        samples_ypos = np.int_([
            convert_to_float(pos.strip())
            for pos in data["samples_y"].split(';')
        ])
        Rover.samples_pos = (samples_xpos, samples_ypos)
        Rover.samples_to_find = np.int(data['sample_count'])
        Rover.homePos = (Rover.pos[0], Rover.pos[1])
        Log("Home=" + str(Rover.homePos))
    else:
        tot_time = time.time() - Rover.start_time
        if np.isfinite(tot_time):
            Rover.total_time = tot_time

    # Near sample flag
    Rover.near_sample = np.int(data["near_sample"])
    # Picking up flag
    Rover.picking_up = np.int(data["picking_up"])

    # Update number of rocks collected
    Rover.samples_collected = Rover.samples_to_find - np.int(
        data["sample_count"])

    # Get the current image from the center camera of the rover
    imgString = data["image"]
    image = Image.open(BytesIO(base64.b64decode(imgString)))
    Rover.img = np.asarray(image)

    # Create dict conta8iner for various processed images
    Rover.procImage = {}
    Rover.procImage['POVRaw'] = Rover.img

    #LogRoverState(Rover, data)
    # Return updated Rover and separate image for optional saving
    return Rover, image
コード例 #4
0
ファイル: guosen_utils.py プロジェクト: dxcv/VectorTrader
def get_dividend(ticker, start_date, end_date):
    '''
    获取股票时间段内实施的分红送股转增数据。
    
    Parameters
    ----------
    ticker
        '600340'
    start_date
        '20100101'
    end_date
        '20150101'
    Returns
    --------
    DataFrame
        index XD_date
        columns XD_date,dividend_per_share,multiplier
                (除权除息日,每股分红,分红后每股乘数)
    '''
    sql_select = '''
    SELECT [stockcode]
      ,[除权除息日]
      ,[送股比例分子]
      ,[转增比例分子]
      ,[派息比例分子_税后]
    FROM BasicData.dbo.Yi_Dividend 
    WHERE stockcode = '%s'
    AND [numtime] is not Null
    ''' % (ticker)
    cur.execute(sql_select)
    data = cur.fetchall()
    columns = [
        'ticker', 'XD_date', 'stock_dividend_numerator', 'transfer_numerator',
        'cash_dividend_numerator_after_tax'
    ]

    df = pd.DataFrame(data, columns=columns)
    df = df.fillna(0)
    df['dividend_per_share'] = convert_to_float(
        df[u'cash_dividend_numerator_after_tax']) / 10.0
    df['multiplier'] = 1 + (convert_to_float(df[u'stock_dividend_numerator']) + \
                                                  convert_to_float(df[u'transfer_numerator'])) / 10.0
    df['XD_date'] = matlab_time_convert(df['XD_date'])
    df = df[['XD_date', 'dividend_per_share',
             'multiplier']].set_index('XD_date', drop=False)
    df = df.sort_index()
    df = df[start_date:end_date]
    return df
コード例 #5
0
    def __init__(self, elem_lst):
        self.cons_no = elem_lst[1]              #用户编号(关联主键)
        self.cons_id = elem_lst[0]              #用户标识

        self.cons_sort_code = elem_lst[10]      #用户分类
        if elem_lst[10] not in sort_dict:
            self.cons_sort_code = "others"

        self.contract_cap = None                #合同容量
        self.contract_cap = utils.convert_to_float(elem_lst[5])

        self.elec_addr = elem_lst[2]            #用电地址
        self.elec_type_code = elem_lst[4]       #用电类别

        self.hec_industry_code = elem_lst[7]    #高耗能行业类别
        if len(elem_lst[7]) == 0:
            self.hec_industry_code = "null"

        self.load_attr_code = elem_lst[6]       #负荷性质
        if len(elem_lst[6]) == 0:
            self.load_attr_code = "null"

        self.org_no = elem_lst[9]               #供电单位编号

        self.status_code = elem_lst[8]          #用户状态
        if len(elem_lst[8]) == 0:
            self.status_code = "null"

        self.trade_code = elem_lst[3]           #行业分类
        if elem_lst[3] not in trade_dict:
            self.trade_code = "others"

        self.urban_rurl_flag = elem_lst[11]     #城乡类别
        if len(elem_lst[11]) == 0:
            self.urban_rurl_flag = "null"
コード例 #6
0
def read_stage_length(stage_soup):
    ## remove all non alphanumeric, unicode safe
    stage_info = stage_soup.find('div', class_='entryHeader').find('h2')
    stage_length = convert_to_float(
        stage_info.find_all('span')[2].text.replace('(', '').replace(
            ')', '').replace('k', ''))
    return stage_length
コード例 #7
0
def main():

    dirname = os.path.dirname(__file__)
    output_dirname = os.path.join(dirname, 'results')

    try:
        os.stat(output_dirname)
    except:
        os.mkdir(output_dirname)

    file_name = sys.argv[1]
    dirname = os.path.dirname(__file__)
    file_name = os.path.join(dirname, file_name)

    d = DataSet(file_name)
    d.loadDataSet()

    to_remove = [
        d.data_set[0].index('Index'),
        d.data_set[0].index('First Name'),
        d.data_set[0].index('Last Name'),
        d.data_set[0].index('Birthday'),
        d.data_set[0].index('Best Hand'),
        d.data_set[0].index('Hogwarts House'),

        # Tests 7/10/18
        d.data_set[0].index('Arithmancy'),
        d.data_set[0].index('Defense Against the Dark Arts'),
        d.data_set[0].index('Divination'),
        d.data_set[0].index('Muggle Studies'),
        d.data_set[0].index('History of Magic'),
        d.data_set[0].index('Transfiguration'),
        d.data_set[0].index('Potions'),
        d.data_set[0].index('Care of Magical Creatures'),
        d.data_set[0].index('Charms'),
        d.data_set[0].index('Flying'),
    ]

    X = np.array([[
        d.data_set[i][j] for j in range(len(d.data_set[0]))
        if j not in to_remove
    ] for i in range(len(d.data_set))])
    #features = X[0,:]
    X = convert_to_float(X[1:, ])

    y_col_nb = d.data_set[0].index('Hogwarts House')
    y = np.array(d.extractColumn(y_col_nb)[1:])

    m = MeanImputation(X)
    m.train()
    m.transform()

    sc = Scaling(X)
    sc.train()
    sc.transform()

    l = LogisticRegression(X=X, y=y)
    l.train()
コード例 #8
0
ファイル: parsers.py プロジェクト: daoos/old-stats-parser
    def _get_value(self):
        """Parse values from tbl_row, last two cells in row."""
        value_1945 = None
        value_1946 = None

        # if cant convert to float, is missing
        try:
            value_1945 = convert_to_float(self.row[3])
        except:
            pass

        # if cant convert to float, is missing
        try:
            value_1946 = convert_to_float(self.row[4])
        except:
            pass

        return [value_1945, value_1946]
コード例 #9
0
ファイル: parsers.py プロジェクト: daoos/old-stats-parser
    def _get_quantity(self):
        """Parse quantities from tbl_row, first two cells after country name."""
        quantity_1945 = None
        quantity_1946 = None

        # if cant convert to float, is missing
        try:
            quantity_1945 = convert_to_float(self.row[1])
        except:
            pass

        # if cant convert to float, is missing
        try:
            quantity_1946 = convert_to_float(self.row[2])
        except:
            pass

        return [quantity_1945, quantity_1946]
コード例 #10
0
 def get_trial_metadata_dictionaries_partial(self, experiment_number, experimentdict):
     '''This creates a dictionary of dictionaries that is organized as such: {Trial-#:{'evid': 12345, 'magnitude': 4.02, 'distance': 124}}.
     This makes it much easier for the information stored to be parsed through by utilizing a double index to get the specific piece of 
     of information about the specific Trial.'''
     experimentId        = experiment_id[experiment_number]
     cache_evid_dict     = {}
     cache_ml_dict       = {}
     cache_distance_dict = {}
     for trial in experimentdict:
         requestDict     = self.make_request("GET","/REST/Project/353/Experiment/%s/Trial/%s" % (experimentId, experimentdict[trial]))
         data            = requestDict['data']
         evid            = utils.parse_description('evid:', data)
         magnitude       = utils.parse_description('ml:', data)
         distance        = utils.parse_description('distance:', data)
         cache_evid_dict[trial]        = utils.convert_to_long(evid)
         cache_ml_dict[trial]          = utils.convert_to_float(magnitude)
         cache_distance_dict[trial]    = utils.convert_to_float(distance)
         nees_logging.log_cache_invalid_cache_variables(trial, cache_evid_dict[trial],cache_ml_dict[trial],cache_distance_dict[trial])
     return cache_evid_dict, cache_ml_dict, cache_distance_dict
コード例 #11
0
    def __init__(self, elem_lst):
        self.cons_no = elem_lst[0]  #用户编号

        self.ymrcvbl_ym = None  #应收年月
        self.ymrcvbl_ym = utils.convert_to_date_YM('09_ARC_A_RCVBL_FLOW.TSV',
                                                   elem_lst[1])

        self.org_no = elem_lst[2]  #供电单位编号
        self.pay_code = elem_lst[3]  #缴费方式

        self.t_pq = 0.0  #总电量
        self.t_pq = utils.convert_to_float(elem_lst[4])

        self.rcvbl_amt = 0.0  #应收金额
        self.rcvbl_amt = utils.convert_to_float(elem_lst[5])

        self.rcved_amt = 0.0  #实收金额
        self.rcved_amt = utils.convert_to_float(elem_lst[6])

        self.status_code = elem_lst[7]  #费用状态

        self.rcvbl_penalty = 0.0  #应收违约金
        self.rcvbl_penalty = utils.convert_to_float(elem_lst[8])

        self.rcved_penalty = 0.0  #实收违约金
        self.rcved_penalty = utils.convert_to_float(elem_lst[9])

        self.risk_level_code = elem_lst[10]  #风险等级

        self.owe_amt = 0.0  #电费金额
        self.owe_amt = utils.convert_to_float(elem_lst[11])

        self.cons_sort_code = elem_lst[12]  #用户分类
        self.elec_type_code = elem_lst[13]  #用电类别
        self.ctl_mode = elem_lst[14]  #费控方式
コード例 #12
0
ファイル: http.py プロジェクト: Valvador/NEEShubloader
def get_trial_metadata_dictionaries_partial(project_id, experiment_id, experimentdict):
    '''This creates a dictionary of dictionaries that is organized as such: {Trial-#:{'evid': 12345, 'magnitude': 4.02, 'distance': 124}}.
    This makes it much easier for the information stored to be parsed through by utilizing a double index to get the specific piece of 
    of information about the specific Trial.'''
    cache_evid_dict     = {}
    cache_ml_dict       = {}
    cache_distance_dict = {}
    for trial in experimentdict:
        request             = "%s%s/Experiment/%s/Trial/%s" % (neeshub_project_path, 
                                                               project_id, 
                                                               experiment_id, 
                                                               experimentdict[trial])
        authentic_request   = utils.authenticate_request(request)
        requestDict         = conn.request('GET', authentic_request)
        data                = requestDict['data']
        evid                = utils.parse_description('evid:', data)
        magnitude           = utils.parse_description('ml:', data)
        distance            = utils.parse_description('distance:', data)
        cache_evid_dict[trial]        = utils.convert_to_long(evid)
        cache_ml_dict[trial]          = utils.convert_to_float(magnitude)
        cache_distance_dict[trial]    = utils.convert_to_float(distance)
        nees_logging.log_cache_invalid_cache_variables(trial, cache_evid_dict[trial],cache_ml_dict[trial],cache_distance_dict[trial])
    return cache_evid_dict, cache_ml_dict, cache_distance_dict
コード例 #13
0
    def standardize_value(self, cleansed_dict, ingredient_texts):
        uom_indxs = [
            i for i, v in enumerate(cleansed_dict['tokens'])
            if v['type'] == 'unit_of_measure'
        ]
        for indx, uom_indx in enumerate(uom_indxs):
            try:
                uom = cleansed_dict['tokens'][uom_indx]['standard_token']
                ing = ingredient_texts[indx]
                # Unit of measure is mostly the previous token
                # So taking uom_value index as 1 less than uom index
                uom_value_indx = uom_indx - 1
                value = cleansed_dict['tokens'][uom_value_indx][
                    'standard_token']
                vtype = cleansed_dict['tokens'][uom_value_indx]['type']
                if vtype != 'value':
                    # Getting the previous token if the current token
                    # is not of type "value"
                    uom_value_indx = uom_value_indx - 1
                    if cleansed_dict['tokens'][uom_value_indx][
                            'type'] == 'value':
                        value = cleansed_dict['tokens'][uom_value_indx][
                            'standard_token']
                    else:
                        continue

                if not value:
                    continue

                sku = ing + '_' + uom
                std_values_dict = self.valid_skus.get(sku, {})
                std_values = sorted(std_values_dict.keys())
                float_value = convert_to_float(value)
                if not float_value:
                    continue

                if float_value not in std_values:
                    for i, v in enumerate(std_values):
                        if float_value < v:
                            v = std_values_dict[v]
                            uom_dict = cleansed_dict['tokens'][uom_value_indx]
                            uom_dict["standard_token"] = v
                            if indx == 0:
                                cleansed_dict['unit_of_measure'] = uom
                                cleansed_dict['unit_of_measure_value'] = v
                            break
            except IndexError:
                continue

        return cleansed_dict
コード例 #14
0
    def standardize_value_old(self, cleansed_dict, ingredient_texts):
        ingredient = cleansed_dict['ingredient']
        uom = cleansed_dict['unit_of_measure']
        value = cleansed_dict['unit_of_measure_value']
        if not value:
            return cleansed_dict

        sku = ingredient + '_' + uom
        std_values_dict = self.valid_skus.get(sku, {})
        std_values = sorted(std_values_dict.keys())
        float_value = convert_to_float(value)
        if float_value not in std_values:
            for i, v in enumerate(std_values):
                if float_value < v:
                    value = std_values_dict[v]
                    break

        if cleansed_dict['unit_of_measure_value'] != value:
            uom_indxs = [
                i for i, v in enumerate(cleansed_dict['tokens'])
                if v['type'] == 'unit_of_measure'
            ]
            for i, uom_indx in enumerate(uom_indxs):
                try:
                    uom = cleansed_dict['tokens'][uom_indx]['standard_token']
                    ing = ingredient_texts[i]
                    if sku == (ing + '_' + uom):
                        uom_value_indx = uom_indx - 1
                        uom_dict = cleansed_dict['tokens'][uom_value_indx]
                        uom_dict["standard_token"] = value
                        cleansed_dict['tokens'][uom_value_indx] = uom_dict
                        if i == 0:
                            cleansed_dict['unit_of_measure_value'] = value
                except IndexError:
                    break

        return cleansed_dict
コード例 #15
0
 def _make_profit_statements(self, code, years):
     self.profits = ts.get_profit_statement(code)
     self.profit_statements = {
         year: utils.convert_to_float(self.profits[year])
         for year in years
     }
コード例 #16
0
def main():
    '''
    Use this script to run experiments and fine-tune the algoritms
    '''

    # Load the dataset
    file_name = sys.argv[1]
    dirname = os.path.dirname(__file__)
    file_name = os.path.join(dirname, file_name)

    d = DataSet(file_name)
    d.loadDataSet()

    # Remove useless features (not numeric + bad regressors).
    to_remove = [
        d.data_set[0].index('Index'),
        d.data_set[0].index('First Name'),
        d.data_set[0].index('Last Name'),
        d.data_set[0].index('Birthday'),
        d.data_set[0].index('Best Hand'),
        d.data_set[0].index('Hogwarts House'),

        # Tests 7/10/18
        d.data_set[0].index('Arithmancy'),
        d.data_set[0].index('Defense Against the Dark Arts'),
        d.data_set[0].index('Divination'),
        d.data_set[0].index('Muggle Studies'),
        d.data_set[0].index('History of Magic'),
        d.data_set[0].index('Transfiguration'),
        d.data_set[0].index('Potions'),
        d.data_set[0].index('Care of Magical Creatures'),
        d.data_set[0].index('Charms'),
        d.data_set[0].index('Flying'),
    ]

    X = np.array([[
        d.data_set[i][j] for j in range(len(d.data_set[0]))
        if j not in to_remove
    ] for i in range(len(d.data_set))])
    X = convert_to_float(X[1:, ])

    y_col_nb = d.data_set[0].index('Hogwarts House')
    y = np.array(d.extractColumn(y_col_nb)[1:])

    # Impute missing values
    m = MeanImputation(X)
    m.train()
    m.transform()

    # Scale the variables
    sc = Scaling(X)
    sc.train()
    sc.transform()

    # Split the dataset in a training and testing set
    sp = SplitTrainTest(X, y)
    sp.Split()
    X_train = sp.X_train
    y_train = sp.y_train
    X_test = sp.X_test
    y_test = sp.y_test

    # Train a logistic regression model
    l = LogisticRegression(X=X_train, y=y_train)
    l.train()

    # Compute the confusion matrix over the training set
    y_predicted = l.predict()

    cm1 = ConfusionMatrix(y_train, y_predicted)
    cm1.getMatrix()
    print('\n\n')
    print(
        '**************** Confusion Matrix on the training set ****************'
    )
    print('\n')
    cm1.Print()

    # Compute the confusion matrix over the testing set
    y_predicted = l.predict(X_test)

    cm2 = ConfusionMatrix(y_test, y_predicted, cm1.unique_labels)
    cm2.getMatrix()
    print('\n\n')
    print(
        '**************** Confusion Matrix on the testing set ****************'
    )
    print('\n')
    cm2.Print()
コード例 #17
0
def main():

    file_name = sys.argv[1]
    dirname = os.path.dirname(__file__)
    file_name = os.path.join(dirname, file_name)

    d = DataSet(file_name)
    d.loadDataSet()

    to_remove = [
        d.data_set[0].index('Index'),
        d.data_set[0].index('First Name'),
        d.data_set[0].index('Last Name'),
        d.data_set[0].index('Birthday'),
        d.data_set[0].index('Best Hand'),
        d.data_set[0].index('Hogwarts House'),

        # Tests 7/10/18
        d.data_set[0].index('Arithmancy'),
        d.data_set[0].index('Defense Against the Dark Arts'),
        d.data_set[0].index('Divination'),
        d.data_set[0].index('Muggle Studies'),
        d.data_set[0].index('History of Magic'),
        d.data_set[0].index('Transfiguration'),
        d.data_set[0].index('Potions'),
        d.data_set[0].index('Care of Magical Creatures'),
        d.data_set[0].index('Charms'),
        d.data_set[0].index('Flying'),
    ]

    X = np.array([[
        d.data_set[i][j] for j in range(len(d.data_set[0]))
        if j not in to_remove
    ] for i in range(len(d.data_set))])
    X = convert_to_float(X[1:, ])

    y_col_nb = d.data_set[0].index('Hogwarts House')
    y = np.array(d.extractColumn(y_col_nb)[1:])

    m = MeanImputation(X)
    m.train()
    m.transform()

    sc = Scaling(X)
    sc.train()
    sc.transform()

    sp = SplitTrainTest(X, y)
    sp.Split()
    X_train = sp.X_train
    y_train = sp.y_train
    X_test = sp.X_test
    y_test = sp.y_test

    l = LogisticRegression(X=X_train,
                           y=y_train,
                           optimizer='sgd',
                           optimizer_params={
                               'alpha': 0.5,
                               'n': 5,
                               'batch_size': 16
                           })
    l.train()

    y_predicted = l.predict()

    cm1 = ConfusionMatrix(y_train, y_predicted)
    cm1.getMatrix()
    print('\n\n')
    print(
        '**************** Confusion Matrix on the training set ****************'
    )
    print('\n')
    cm1.Print()

    y_predicted = l.predict(X_test)

    cm2 = ConfusionMatrix(y_test, y_predicted, cm1.unique_labels)
    cm2.getMatrix()
    print('\n\n')
    print(
        '**************** Confusion Matrix on the testing set ****************'
    )
    print('\n')
    cm2.Print()
コード例 #18
0
 def latest_equity(self):
     bs = utils.convert_to_float(self.balance_sheet.balances.iloc[:, 1])
     return bs[bsheet.balance_sheet_index['equity']]
コード例 #19
0
import pandas as pd
import numpy as np
from utils import convert_ids, convert_ids, convert_to_float, to_json
movies_metadata_df1 = pd.read_csv('../data/movies_metadata.csv'
                                 , converters={ 'id': lambda x: convert_ids(x)
                                               , 'imdb_id': lambda x: convert_ids(x)
                                               ,'popularity': lambda x: convert_to_float(x)
                                               ,'genres': lambda x: to_json(x)}
                                 , usecols=['id', 'original_title'
                                                , 'genres' #'homepage'
                                                , 'overview', 'popularity', 'poster_path'
                                                , 'release_date', 'revenue', 'runtime'
                                                , 'spoken_languages', 'title'
                                                , 'vote_average', 'vote_count']
                                , dtype={'populariy': np.float64}
                                , parse_dates=True, low_memory=False)


movies_lookup_df = pd.read_csv('../data/movies_metadata.csv'
                        , converters={'id': lambda x: convert_ids(x), 'imdb_id': lambda x: convert_ids(x)}
                       ,usecols=['id', 'title'], low_memory=False)

#####################################
##SVD DATA SET
movies_df = pd.read_csv('../data/movies_metadata.csv'
                        , converters={'id': lambda x: convert_ids(x), 'imdb_id': lambda x: convert_ids(x)}
                       ,usecols=['id', 'original_title', 'belongs_to_collection'
                                 , 'budget', 'genres', 'homepage'
                                 ,'imdb_id', 'overview', 'popularity', 'poster_path'
                                 , 'production_companies','release_date', 'revenue', 'runtime',
                                 'spoken_languages', 'status', 'tagline', 'title', 'video',
コード例 #20
0
ファイル: tests.py プロジェクト: leyyin/university
def test_convert_to_float():
    assert utils.convert_to_float("23.45") == 23.45
    assert utils.convert_to_float("0") == 0.0
    assert utils.convert_to_float("-0") == 0.0
    assert utils.convert_to_float("-23") == -23.0
    assert utils.convert_to_float(245) == 245.0
    assert utils.convert_to_float(-245) == -245.0
    assert utils.convert_to_float(-245.8457) == -245.8457
    assert utils.convert_to_float(0) == 0.0
    assert utils.convert_to_float("43.53") == 43.53
    assert utils.convert_to_float("25sn") is None
    assert utils.convert_to_float("s45") is None
    assert utils.convert_to_float("string") is None
コード例 #21
0
def main():

    dirname = os.path.dirname(__file__)
    dirname_prediction = os.path.join(dirname, 'results')

    file_name = sys.argv[1]
    file_name = os.path.join(dirname, file_name)

    d = DataSet(file_name)
    d.loadDataSet()

    to_remove = [
        d.data_set[0].index('Index'),
        d.data_set[0].index('First Name'),
        d.data_set[0].index('Last Name'),
        d.data_set[0].index('Birthday'),
        d.data_set[0].index('Best Hand'),
        d.data_set[0].index('Hogwarts House'),

        # Tests 7/10/18
        d.data_set[0].index('Arithmancy'),
        d.data_set[0].index('Defense Against the Dark Arts'),
        d.data_set[0].index('Divination'),
        d.data_set[0].index('Muggle Studies'),
        d.data_set[0].index('History of Magic'),
        d.data_set[0].index('Transfiguration'),
        d.data_set[0].index('Potions'),
        d.data_set[0].index('Care of Magical Creatures'),
        d.data_set[0].index('Charms'),
        d.data_set[0].index('Flying'),
    ]

    index_position = d.data_set[0].index('Index')
    indexes = np.array(
        [d.data_set[i][index_position] for i in range(len(d.data_set))])[1:]

    X = np.array([[
        d.data_set[i][j] for j in range(len(d.data_set[0]))
        if j not in to_remove
    ] for i in range(len(d.data_set))])
    #features = X[0,:]
    X = convert_to_float(X[1:, ])

    m = MeanImputation(X,
                       path_to_mean_imputation=os.path.join(
                           dirname_prediction, 'mean_imputation.json'))
    m.transform()

    sc = Scaling(X,
                 path_to_scaling=os.path.join(dirname_prediction,
                                              'scaling.json'))
    sc.transform()

    l = LogisticRegression(X=X,
                           path_to_beta=os.path.join(dirname_prediction,
                                                     'beta.json'))
    predictions = l.predict()

    dirname = os.path.dirname(__file__)
    file_name = os.path.join(dirname, 'resources/houses.csv')
    with open(file_name, 'w+') as outfile:
        writer = csv.writer(outfile, delimiter=',')
        writer.writerow(['Index', 'Hogwarts House'])
        for i in range(len(indexes)):
            writer.writerow([indexes[i], predictions[i]])
コード例 #22
0
 def _make_cash_flows(self, code, years):
     cash_flows = ts.get_cash_flow(code)
     self.cash_flows = {
         year: utils.convert_to_float(cash_flows[year])
         for year in years
     }
コード例 #23
0
ファイル: balance_sheet.py プロジェクト: JackSunshine/zebra
 def _make_balance_sheets(self, code, years):
     self.balances = ts.get_balance_sheet(code)
     self.balance_sheets = {
         year: utils.convert_to_float(self.balances[year])
         for year in years
     }