def fetchQuotes(sym, start=FROM_DATE, end=CURRENT_DATE):
    his = None
    data = None
        # print start, end
        data = ystockquote.get_historical_prices(sym, start, end)
    except Exception:
        print "Please check the dates. Data might not be available. 404 returned"

        # 404 due to data yet not available
    if data:
        his = DataFrame(collections.OrderedDict(sorted(data.items()))).T
        his = his.convert_objects(convert_numeric=True)
        his.index = pd.to_datetime(his.index)
        his.insert(0, 'symbol', sym, allow_duplicates=True)
        # insert the date as dataframe too
        his.insert(1, 'date', his.index)
        # his.columns = getColumns('stock_quote_historical')   # Removing as db dependency is removed
        his.columns = getColumnsNoSql('stock_quote_historical')

    daily = ystockquote.get_all(sym)
    # print daily
    # persist(his, daily, sym, end)

    return his, daily
Example #2
def get_hosts():
    csv_file = "./hosts.csv"
    print "Hosts input file: %s" % csv_file

    hosts_raw = pd.read_csv(csv_file, sep=',', names=['#', 'Provider', 'Region', 'CPUs', 'Memory', 
                                                  'Storage', 'DiskType', 'OS', 'Cost','Comment'])

    hosts = DataFrame(hosts_raw)
    hosts = hosts[1:]
    hosts = hosts.convert_objects(convert_numeric=True)
    print hosts
    return hosts
            value_quant = observation.find('{}valueQuantity')
            value = value_quant.find('{}value')
    if (not systolic_bps) & (not diastolic_bps):
        #print "No Systolic/Diastolic BP"
    if((len(encounter_dates)!=len(systolic_bps)) | (len(systolic_bps) != len(diastolic_bps))):

    enc_dict = {}
    enc_dict['encounter_date'] = encounter_dates
    enc_dict['systolic_bp'] = systolic_bps
    enc_dict['diastolic_bp'] = diastolic_bps
    encounters = DataFrame(enc_dict, columns=['encounter_date', 'diastolic_bp', 'systolic_bp'])
    encounters = encounters.convert_objects(convert_dates='coerce', convert_numeric=True)
    #print encounters
    #encounters.is_copy = False
    enc_period = encounters[(encounters.encounter_date.dt.year >= 2004) & (encounters.encounter_date.dt.year <= 2009)]
    if enc_period.empty:
        #print "No data between given period"
    enc_period['mean_bp'] = enc_period['diastolic_bp']
    #+ ((enc_period['systolic_bp']-enc_period['diastolic_bp'])/3)
    mbp = enc_period['mean_bp'].mean()
    sbp = enc_period['mean_bp'].std()
    if math.isnan(sbp):
    count = count +1
Example #4
def _data_to_frame(data, header, index_col, infer_types, skiprows):
    """Parse a BeautifulSoup table into a DataFrame.

    data : tuple of lists
        The raw data to be placed into a DataFrame. This is a list of lists of
        strings or unicode. If it helps, it can be thought of as a matrix of
        strings instead.

    header : int or None
        An integer indicating the row to use for the column header or None
        indicating no header will be used.

    index_col : int or None
        An integer indicating the column to use for the index or None
        indicating no column will be used.

    infer_types : bool
        Whether to convert numbers and dates.

    skiprows : collections.Container or int or slice
        Iterable used to skip rows.

    df : DataFrame
        A DataFrame containing the data from `data`

        * If `skiprows` is not found in the rows of the parsed DataFrame.

        * If `skiprows` is not found in the rows of the parsed DataFrame.

    See Also

    The `data` parameter is guaranteed not to be a list of empty lists.
    thead, tbody, tfoot = data
    columns = thead or None
    df = DataFrame(tbody, columns=columns)

    if skiprows is not None:
        it = _get_skiprows_iter(skiprows)

            df = df.drop(it)
        except ValueError:
            raise ValueError('Labels {0} not found when trying to skip'
                             ' rows'.format(it))

    # convert to numbers/dates where possible
    # must be sequential since dates trump numbers if both args are given
    if infer_types:
        df = df.convert_objects(convert_numeric=True)
        df = df.convert_objects(convert_dates='coerce')

    if header is not None:
        header_rows = df.iloc[header]

        if header_rows.ndim == 2:
            names = header_rows.index
            df.columns = MultiIndex.from_arrays(header_rows.values,
            df.columns = header_rows

        df = df.drop(df.index[header])

    if index_col is not None:
        cols = df.columns[index_col]

            cols = cols.tolist()
        except AttributeError:

        # drop by default
        df.set_index(cols, inplace=True)
        if df.index.nlevels == 1:
            if isnull( or not
       = None
            names = [name or None for name in df.index.names]
            df.index = MultiIndex.from_tuples(df.index.values, names=names)

    return df
def Get_dataframe(Symbol):
    Symbolx = str(Symbol) + '_history'
    Db_cursor = MongoClient()['stox'][Symbolx].find()
    x = DataFrame(list(Db_cursor))
    x = x.convert_objects(convert_numeric=True)
    return x
    # for table_set in p.tables:
    #     for table in table_set[1:]:
    #         scoring_probability = float(table[7]) * float(table[-1])
    #         if scoring_probability > 15.0:
    #             a = '%s %f | %s %f' % (table[1], scoring_probability, table[1], scoring_probability)
    #             print(a)

    with open('table_example.html', encoding='utf-8') as file_:
        s =
        p = HTMLTableParser()
        table_set = array([table[1:] for table_group in p.tables for table in table_group])
        df = DataFrame(table_set[1:], columns=table_set[0])
        df = df.convert_objects(convert_numeric=True)
        df['ZS'] = 9
        for team, indexes in df.groupby('Tým').groups.items():
            team_data_set = df.loc[indexes, ['Z', 'ZS']]
            print((team_data_set['ZS'] / team_data_set['Z']).sum())
    #     # for team in df.get('Tým').unique():
    #     #     print(team)
    #     multiplication_result = df.get('S/Z') * df.get('RÚS')
    #     result_df = DataFrame({'Name': df.get('Jméno'), 'Team': df.get('Tým'), 'Probability':multiplication_result})
    #     for team, indexes in result_df.groupby('Team').groups.items():
    #         print(result_df.loc[indexes, ['Name', 'Probability']].set_index('Name').to_dict()['Probability'])
        # for table_set in p.tables:
        #     for table in table_set[1:]:
        #         scoring_probability = float(table[7]) * float(table[-1])
        #         if scoring_probability > 50.0:
df['resp']=1 #adds new column. 

#add the next trial
thisTrial =
df= df.append( thisTrial, ignore_index=True ) #ignore because I got no index

#add the next trial
thisTrial =
df= df.append( thisTrial, ignore_index=True )

#Use pandas to calculate proportion correct at each level
#The df.dtypes in my case are  "objects". I don't know what that is and you can't take the mean
df = df.convert_objects(convert_numeric=True) #convert dtypes from object to numeric

#print('df='); print(df) #debug
grouped = df.groupby('tilt')
groupMeans= grouped.mean() #a groupBy object, kind of like a DataFrame but without column names, only an index?
tiltsTested = list(groupMeans.index)
pResp = list(groupMeans['resp'])  #x.iloc[:]
ns = grouped.sum() #want n per trial to scale data point size
ns = list(ns['resp'])
print('df mean at each tilt\n'); print(  DataFrame({'tilt': tiltsTested, 'pResp': pResp, 'n': ns })   )
#data point sizes. One entry in array for each datapoint
#def plotDataAndPsychometricCurve(staircase,fit,descendingPsycho,threshVal):