Ejemplo n.º 1
0
    def get_data_for_float(self, dac_url, only_file=None, 
            surface_values_only=False):
        '''Given a dac_url return a list of hashes of data for each 
        profile for the float specified in dac_url.  Example dac_url
        for float 1900722:

        http://tds0.ifremer.fr/thredds/catalog/CORIOLIS-ARGO-GDAC-OBSaoml/1900722/profiles/catalog.xml
        '''
        pd = []
        for profile_url in sorted(self.get_profile_opendap_urls(dac_url)):
            if only_file:
                if not profile_url.endswith(only_file):
                    continue

            float = profile_url.split('/')[7]
            prof = str(profile_url.split('/')[-1].split('.')[0].split('_')[1])
            self.logger.info('Reading data from ' + profile_url[:20] + '...' +
                       profile_url[-50:])
            try:
                d = self.get_profile_data(profile_url, 
                        surface_values_only=surface_values_only)
                pd.append({float: {prof: d}})
            except RequiredVariableNotPresent as e:
                self.logger.warn(e)
            except OpenDAPServerError as e:
                self.logger.warn(e)

        return pd
Ejemplo n.º 2
0
def get_values(lar):
    st = []
    pd = []
    lar = str(lar)
    for part in lar.split(' '):

        if "kernel_size" in part[0:12]:
            ker = part[-2]
        if "MaxPool2d" in part:
            ker = part[-2]

        if 'AdaptiveAvgPool2d' in part:
            ker = [i[-2] for i in part.split(' ') if 'output_size' in i][0]
            s = 1

        if 'stride' in part[0:7]:
            s = part[-2]

        if 'padding' in part:
            pd.append(int(part[-2]))
        else:
            pd.append(0)

    p = max(pd)
    return int(ker), int(s), int(p)
Ejemplo n.º 3
0
def calc_embs(filepaths, batch_size=64):
    pd = []
    for start in tqdm(range(0, len(filepaths), batch_size)):
        aligned_images = load_and_align_images(filepaths[start:start +
                                                         batch_size])
        pd.append(nn4_small2.predict_on_batch(np.squeeze(aligned_images)))
    embs = np.array(pd)

    return np.array(embs)
Ejemplo n.º 4
0
def calc_embs(file_paths, batch_size=64):
    pd = []
    for start in tqdm(range(0, len(file_paths), batch_size)):
        aligned_images = load_and_align_images(file_paths[start:start +
                                                          batch_size])
        pd.append(model.predict_on_batch(np.squeeze(aligned_images)))
    embs = l2_normalize(np.concatenate(pd))
    # embs = np.array(pd)
    return np.array(embs)
Ejemplo n.º 5
0
def ProbDis(vector):  #Define Probability distribution
    pd = []
    m, n = vector.shape
    for i in range(m):
        v = 0
        for j in range(n):
            v = v + (vector[i, j]**2)
        pd.append(v / n)
    return pd
Ejemplo n.º 6
0
def calc_embs(filepaths, margin=10, batch_size=10):
    pd = []
    for start in tqdm(range(0, len(filepaths), batch_size)):
        aligned_images = prewhiten(
            load_and_align_images(filepaths[start:start + batch_size], margin))
        pd.append(model.predict_on_batch(aligned_images))
    embs = l2_normalize(np.concatenate(pd))

    return embs
Ejemplo n.º 7
0
def calc_emb_test(faces):
    pd = []
    aligned_faces = align_faces(faces)
    if (len(faces) == 1):
        pd.append(nn4_small2.predict_on_batch(aligned_faces))
    elif (len(faces) > 1):
        pd.append(nn4_small2.predict_on_batch(np.squeeze(aligned_faces)))
    embs = np.array(pd)
    return np.array(embs)
Ejemplo n.º 8
0
    def step_by_step_static_path_density(self, ende=None):
        """ Returns list. [index=Aggregation depth: static path density]
        """
        pd = []
        for i, Cn in enumerate(self.step_by_step_aggregation(ende)):
            print 'Static path density. Step ', i
            pd.append(self.path_density_of_A(Cn))

        return pd
Ejemplo n.º 9
0
def calc_emb_test(faces):
    pd = []
    aligned_faces = align_faces(faces)
    if len(faces) == 1:
        pd.append(model.predict_on_batch(aligned_faces))
    elif len(faces) > 1:
        pd.append(model.predict_on_batch(np.squeeze(aligned_faces)))
    embs = l2_normalize(np.concatenate(pd))
    # embs = np.array(pd)
    return np.array(embs)
Ejemplo n.º 10
0
def find_closest_nodes(matrix):
    nodes = list(matrix.columns)
    pd = []
    # find minimal distance in matrix
    for n1 in nodes:
        for n2 in nodes:
            if n1==n2:
                continue
            pd.append((matrix[n1][n2], n1, n2))
    pd.sort()
    return pd[0]
Ejemplo n.º 11
0
def calc_emb_test(faces):
    pd = []
    aligned_faces = align_faces(faces)
    if(len(faces)==1):
        pd.append(nn4_small2.predict_on_batch(aligned_faces))
    elif(len(faces)>1):
        pd.append(nn4_small2.predict_on_batch(np.squeeze(aligned_faces)))
    #embs = l2_normalize(np.concatenate(pd))
    embs = np.array(pd)
    #print(embs)
    return np.array(embs)
Ejemplo n.º 12
0
def calc_embs(filepaths, margin=10, batch_size=1):
    aligned_images = prewhiten(load_and_align_images(filepaths, margin))
    pd = []
    current = 0
    for start in range(0, len(aligned_images), batch_size):
        total = len(aligned_images)
        if current % 5 == 0:
            print('{} / {}'.format(current, total))
        pd.append(
            model.predict_on_batch(aligned_images[start:start + batch_size]))
        current += 1
    embs = l2_normalize(np.concatenate(pd))

    return embs
Ejemplo n.º 13
0
def arima(df,time_id,lookback, p,d,q):
    Log(LOG_INFO) << "Computing arima(%d,%d,%d) with lookback: %d " % (p,d,q,lookback)
    pd=[]
    for tid in time_id:
        # pdb.set_trace()
        series = np.log(df[OPEN_KEY][tid-lookback:tid].values)
        model = ARIMA(series,order=(p,d,q))
        model_fit = model.fit(method_kwargs={"warn_convergence": False})
        output = model_fit.forecast()
        p0 = np.log(df[OPEN_KEY][tid])
        err = (output[0]-p0)/p0
        pd.append(err)

    pd = np.array(pd)
    return pd.reshape(-1,1)
Ejemplo n.º 14
0
    def s(self, q, pars=None):

        if not hasattr(self, '_atomic_formfactors'):
            self._atomic_formfactors = formFactor(q, self.Z)

        if pars is None:
            pars = self.par0
        else:
            # print(pars)
            # print(self.par0.keys())
            assert all([key in pars.keys() for key in self.par0.keys()]), \
                'the input parameter dict does not contain all necessary parameter keys'

        if self.reparameterized:
            pars = self.convert(pars)

        if not self.dispersed:
            self.transform(pars)
            return Debye(q, self, f=self._atomic_formfactors)

        else:
            pd = []
            wd = []
            for t in self._associated_transformation:
                if t.dw:
                    _p, _w = t.dw.disperse(pars, t.name)
                else:
                    _p, _w = pars[t.name], 1
                pd.append(_p)
                wd.append(_w)

            pd_grid = [i.ravel() for i in np.meshgrid(*pd)]
            wd_grid = [i.ravel() for i in np.meshgrid(*wd)]

            n = len(pd_grid[0])  # number of combinations
            # _bla = 0
            _s = np.zeros(q.shape)
            for i in range(n):
                _p_dict = {}
                _w = 1
                for j, key in enumerate(self._t_keys):
                    _p_dict[key] = pd_grid[j][i]
                    _w *= wd_grid[j][i]
                self.transform(_p_dict)
                _s += _w * Debye(q, self, f=self._atomic_formfactors)

            return _s
Ejemplo n.º 15
0
    def test_compare_with_closed_form(self):
        """Test that compares the computed with the analytical CRPS."""

        pd_single = norm(0, 1)
        pd = []
        for i in range(0, 3):
            pd.append(pd_single)
        meas = [-1, 0, 1]

        mean_crps, single_crps = crps(pd, meas)

        def crps_closed_form(pd, meas):
            return meas * (2 * pd.cdf(meas) - 1) + 2 * pd.pdf(meas) - 1 / np.sqrt(np.pi)

        crps_analytical = list(map(crps_closed_form, pd, meas))

        is_good = np.isclose(np.array(single_crps), np.array(crps_analytical)).all()
        assert_true(is_good, msg="Computed CRPS is not equal to analytical CRPS.")
Ejemplo n.º 16
0
    def test_compare_different_expectations(self):
        """Test that compares same distance between meas and pd."""

        pd_single = norm(0, 1)
        pd = []
        for i in range(0, 3):
            pd.append(pd_single)
        meas = [-1, 0, 1]
        mean_crps1, single_crps1 = crps(pd, meas)

        pd2 = []
        for i in range(0, 3):
            pd2.append(norm(i, 1))
        meas2 = [-1, 1, 3]

        mean_crps2, single_crps2 = crps(pd2, meas2)

        is_good = np.equal(single_crps1, single_crps2).all()
        assert_true(is_good, msg="Relation of individual CPRS values should return same value.")
    def test_compare_different_expectations(self):
        """Test that compares same distance between meas and pd."""

        pd_single = norm(0, 1)
        pd = []
        for i in range(0, 3):
            pd.append(pd_single)
        meas = [-1, 0, 1]
        meanCRIGN1, singleCRIGN1 = crign.crign(pd, meas)

        pd2 = []
        for i in range(0, 3):
            pd2.append(norm(i, 1))
        meas2 = [-1, 1, 3]

        meanCRIGN2, singleCRIGN2 = crign.crign(pd2, meas2)

        is_good = np.isclose(singleCRIGN1, singleCRIGN2).all()
        assert_true(
            is_good,
            msg=
            "Relation of individual CRIGN values should return roughly the same value."
        )
Ejemplo n.º 18
0
def getClusterCenters(data, k):
    rows, cols = data.shape
    centers = np.zeros((k, cols))
    r = np.random.choice(rows, 1)  #Initial Choice
    centers[0, :] = data[r, :]
    k_center = 1
    while k_center < k:
        pd = []  #pd represents probability distribution
        dist_sum = 0
        for i in range(rows):
            max_dist = 1000
            for j in range(k_center):
                dist = np.linalg.norm(data[i] - centers[j])**2
                if dist < max_dist:
                    max_dist = dist
            d = max_dist * max_dist
            dist_sum += d
            pd.append(d)
        for i in range(rows):
            pd[i] = pd[i] / dist_sum
        r = np.random.choice(rows, 1, p=pd)
        centers[k_center, :] = data[r, :]
        k_center = k_center + 1
    return centers
Ejemplo n.º 19
0
 def map_to_chart(candle, pd, stock):
     temp = DailyData.chart(candle['date'], stock, candle['open'],
                            candle['high'], candle['low'], candle['close'],
                            candle['volume'], "NOT AVAILABLE",
                            "NOT AVAILABLE", "NOT AVAILABLE")
     pd = pd.append(
         {
             'date': temp.get_date(),
             'symbol': stock,
             'open': temp.get_open(),
             'high': temp.get_high(),
             'low': temp.get_low(),
             'close': temp.get_close(),
             'volume': temp.get_volume(),
             'change': temp.get_change(),
             'changePercent': temp.get_changePercent(),
             'vwap': temp.get_vwap()
         },
         ignore_index=True)
     return pd
Ejemplo n.º 20
0
def get_pm25_df(html_path):
    with open(html_path, encoding="utf-8") as html_file:
        soup = BeautifulSoup(html_file, "html.parser")
    pm25_df = pd.DataFrame(
        columns=["hour", "pm25_value_max", "pm25_value_min"])
    container_tag = soup.find_all("div", {"class": "whitebody"})[0]
    parent_tag = container_tag.center.div.find("div", {
        "class": "forecast-body"
    }).find("div", {
        "class": "forecast-body-table"
    }).table
    list_of_pm25 = parent_tag.find("tr", {
        "class": "wf-row-pm25"
    }).find_all("td")[1:]
    hour = 0
    for each_point in list_of_pm25:
        try:
            this_hour = hour
            max_pm25 = int(
                each_point.div.find("div", {
                    "class": "wf-cell-aqi-val-max"
                }).text)
            min_pm25 = int(
                each_point.div.find("div", {
                    "class": "wf-cell-aqi-val-min"
                }).text)
            pm25_df = pm25_df.append(
                {
                    "hour": this_hour,
                    "pm25_value_max": max_pm25,
                    "pm25_value_min": min_pm25
                },
                ignore_index=True)
            hour += 3
            hour = hour % 24
        except:
            print("this one dose not contain info")
    return pm25_df
Ejemplo n.º 21
0
def condensematrix(dm, pd, names, key, hrf='canonical', op='mult'):
    # returns condition with probe removed
    import copy as cp
    import numpy as np
    delays = None
    if hrf == 'fir':
        delays = []
        for i in dm.names:
            if i == 'constant':
                delays.append('-1')
            else:
                delays.append(i.split('_')[i.split('_').index('delay') + 1])
        delays = np.array(delays, dtype=int)

    if op == 'stack':
        for i in dm.names:
            if (i != 'constant'):
                if (i.split('_')[i.split('_').index(key) + 1] != '0'):
                    pd.append(dm.matrix[:, dm.names.index(i)])
                    names.append(i.replace('glm_label_', ''))
    else:
        idx = []
        for i in dm.names:
            if i == 'constant':
                idx.append('0')
            else:
                idx.append(i.split('_')[i.split('_').index(key)+1])
        idx = np.array(idx, dtype=float)

        if delays is not None:
            for d in np.arange(np.max(delays)+1):
                outkey = key + '_delay_' + str(d)
                outidx = idx[delays == d]
                pd.append(np.dot(dm.matrix[:, delays==d], outidx))
                names.append(outkey)
        else:
            pd.append(np.dot(dm.matrix, idx))
            names.append(key)
Ejemplo n.º 22
0
def condensematrix(dm, pd, names, key, hrf='canonical', op='mult'):
    # returns condition with probe removed
    import copy as cp
    import numpy as np
    delays = None
    if hrf == 'fir':
        delays = []
        for i in dm.names:
            if i == 'constant':
                delays.append('-1')
            else:
                delays.append(i.split('_')[i.split('_').index('delay') + 1])
        delays = np.array(delays, dtype=int)

    if op == 'stack':
        for i in dm.names:
            if (i != 'constant'):
                if (i.split('_')[i.split('_').index(key) + 1] != '0'):
                    pd.append(dm.matrix[:, dm.names.index(i)])
                    names.append(i.replace('glm_label_', ''))
    else:
        idx = []
        for i in dm.names:
            if i == 'constant':
                idx.append('0')
            else:
                idx.append(i.split('_')[i.split('_').index(key) + 1])
        idx = np.array(idx, dtype=float)

        if delays is not None:
            for d in np.arange(np.max(delays) + 1):
                outkey = key + '_delay_' + str(d)
                outidx = idx[delays == d]
                pd.append(np.dot(dm.matrix[:, delays == d], outidx))
                names.append(outkey)
        else:
            pd.append(np.dot(dm.matrix, idx))
            names.append(key)
Ejemplo n.º 23
0
type(df.ix[['a','b'], 'Two'])
type(df.ix[['a','b'],:])
df.ix[['a','b'],:]
df.ix[['a','b'],]
df.plot()
matplotlib.show()
matplotlib.Show()
import matplotlib as mpl
df.show()
df1 = pd.DataFrame(np.random.randn(6,3), columns=['A','B','C'])
df2 = pd.DataFrame(np.random.randn(6,3), columns=['D','E','F'])
df3 = df1.copy()
df1
df = pd.concat([df1, df2])
df
df = pd.append([df1, df2])
help(pd.concat)
df = pd.append([df1, df2], join='Inner')
df = pd.concat([df1, df2], join='Inner')
df = pd.concat([df1, df2], join=inner)
df = pd.concat([df1, df2], join='inner')
df
df = pd.concat([df1, df2], join_axes='inner')
df = df1.append(df2)
df
df = pd.concat([df1, df2], axes=1)
df = df1.append(df2, axis=1)
df = df1.append(df2, ignore_index=1)
df
df = df1.append(df2, ignore_index=0)
df
Ejemplo n.º 24
0
baseUrl='https://movie.douban.com/top250?start='
headers = {
     'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.56'

}
movieDataList = []
for j in range(0,2):
    star=j*25
    url=baseUrl+str(star)
    req = Request(url=url, headers=headers, method="GET")
    response = urlopen(req).read().decode("utf-8")

    soup = BeautifulSoup(response, 'html.parser')
    movielist = soup.find_all("div", class_="item")
    for i in range(0, 25):
        moviename = re.search(r'<span class="title">(.*?)</span>', str(movielist[i]))
        movevedio = movielist[i].select(".info > .hd > a")[0]
        vedioname = re.search(r'<a class="" href="(.*?)">', str(movevedio))
        movieimg = movielist[i].select(".pic>a>img")
        img = re.search('src="(.*?)"', str(movieimg))
        movedaoyan = movielist[i].select(".info>.bd>p")[0]
        daoyan = re.search('<p class="">([\s\S]*?)</p>', str(movedaoyan))
        jianjie = daoyan.group(1).strip().replace(" ", '').replace("\n", '').replace("<br/>", '').replace('/', '')
        moviecomment = movielist[i].select(".star>.rating_num")[0].get_text()
        jieshao = movielist[i].select('.quote>.inq')[0].get_text()
        movedata = (moviename.group(1), vedioname.group(1), img.group(1), jianjie, moviecomment, jieshao)
        # print(movedata)
        movieDataList.append(movedata)

print(movieDataList)
print(len(movieDataList))
Ejemplo n.º 25
0
prob_df = pd.DataFrame()
prob_df['label'] = y_test
prob_df['prob_down'] = prob_down

global quantile
quantile = np.percentile(prob_down,list(list(range(0,101,10))))

prob_df['group'] = prob_df['prob_down'].apply(group_prob)

pd = []
td = []
for i in range(1,11):
    group_df = prob_df[prob_df['group']==i]
    true_down_prob = stats.itemfreq(group_df['label'])[0][1]/group_df.shape[0]
    print('group:',i,'predict_prob:',group_df['prob_down'].median(),'true_prob:',true_down_prob)
    pd.append(group_df['prob_down'].median())
    td.append(true_down_prob)

print('mse',mean_squared_error(pd,td))



############ Lasso model ###############


from sklearn import linear_model
from sklearn.metrics import r2_score

df = pd.read_csv('/Users/vikasnatesh/Downloads/labeled_data_10s.csv')
# Train Test split (80% train, 20% test)
Ejemplo n.º 26
0
def make_parammat(dm, hrf='canonical', zscore=False):
    # remove anything with a 0 and include probe as a feature
    # assuming dm is a dict
    import numpy as np
    out = dm[dm.keys()[0]]
    pd = []
    names = []
    for key in dm.keys():
        if key == 'motion':
            names.append('motion_0')
            pd.append(np.dot(dm[key].matrix, np.array([1, 0, 0, 0, 0, 0, 0])))
            names.append('motion_1')
            pd.append(np.dot(dm[key].matrix, np.array([0, 1, 0, 0, 0, 0, 0])))
            names.append('motion_2')
            pd.append(np.dot(dm[key].matrix, np.array([0, 0, 1, 0, 0, 0, 0])))
            names.append('motion_3')
            pd.append(np.dot(dm[key].matrix, np.array([0, 0, 0, 1, 0, 0, 0])))
            names.append('motion_4')
            pd.append(np.dot(dm[key].matrix, np.array([0, 0, 0, 0, 1, 0, 0])))
            names.append('motion_5')
            pd.append(np.dot(dm[key].matrix, np.array([0, 0, 0, 0, 0, 1, 0])))
        # hardcode stim and verb
        elif key == 'stim' or key == 'verb' or key == 'anim':
            condensematrix(dm[key], pd, names, key, hrf, op='stack')
        else:
            condensematrix(dm[key], pd, names, key, hrf, op='mult')
    # don't need constant because normalized data
    # pd.append(np.ones(np.shape(pd[-1])))
    # names.append('constant')
    if zscore == True:
        out.matrix = zs(np.array(pd).T)
    else:
        out.matrix = (np.array(pd).T)
    out.names = names
    return out
# winter

winter = joindf[(joindf.index.month == 12) | (joindf.index.month == 1) |
                (joindf.index.month == 2)]

spring = joindf[(joindf.index.month > 2) & (joindf.index.month < 6)]
summer = joindf[(joindf.index.month > 5) & (joindf.index.month < 9)]
autumn = joindf[(joindf.index.month > 8) & (joindf.index.month < 12)]

correlation = {
    'annual': joindf.corr().iloc[0, 1],
    'winter': winter.corr().iloc[0, 1],
    'spring': spring.corr().iloc[0, 1],
    'summer': summer.corr().iloc[0, 1],
    'autumn': autumn.corr().iloc[0, 1]
}

# funciona com xarray mas nao com o pandas...
nstorm_monthly_mean = nstorms.groupby(by=nstorms.index.month).mean()

for i in np.arange(1, 13, 1):
    df = nstorms[nstorms.index.month == i] - nstorm_monthly_mean.year[i]
    df.columns = ['norm']

    result = pd.append([nstorms, df], axis=1, join='outer')

nstorms_norm = nstorms_norm.reset_index(drop=True)
indices_crop = indices_crop.reset_index(drop=True)
correlation = indices_crop.corr(nstorms_norm.values, method='pearson')
plt.figure()
Ejemplo n.º 28
0
def make_parammat(dm, hrf='canonical', zscore=False):
    # remove anything with a 0 and include probe as a feature
    # assuming dm is a dict
    import numpy as np
    out = dm[dm.keys()[0]]
    pd = []
    names = []
    for key in dm.keys():
        if key == 'motion':
            names.append('motion_0')
            pd.append(np.dot(dm[key].matrix, np.array([1, 0, 0, 0, 0, 0, 0])))
            names.append('motion_1')
            pd.append(np.dot(dm[key].matrix, np.array([0, 1, 0, 0, 0, 0, 0])))
            names.append('motion_2')
            pd.append(np.dot(dm[key].matrix, np.array([0, 0, 1, 0, 0, 0, 0])))
            names.append('motion_3')
            pd.append(np.dot(dm[key].matrix, np.array([0, 0, 0, 1, 0, 0, 0])))
            names.append('motion_4')
            pd.append(np.dot(dm[key].matrix, np.array([0, 0, 0, 0, 1, 0, 0])))
            names.append('motion_5')
            pd.append(np.dot(dm[key].matrix, np.array([0, 0, 0, 0, 0, 1, 0])))
        # hardcode stim and verb
        elif key == 'stim' or key == 'verb' or key == 'anim':
            condensematrix(dm[key], pd, names, key, hrf, op='stack')
        else:
            condensematrix(dm[key], pd, names, key, hrf, op='mult')
    # don't need constant because normalized data
    # pd.append(np.ones(np.shape(pd[-1])))
    # names.append('constant')
    if zscore==True:
        out.matrix = zs(np.array(pd).T)
    else:
        out.matrix = (np.array(pd).T)
    out.names = names
    return out
Ejemplo n.º 29
0
from scraper import *
import pandas as pd

link = ['https://www.ah.nl/producten/product/wi48405/ah-basic-havermout', 'https://www.ah.nl/producten/product/wi383520/ah-amandel-drink-ongezoet']
name = ['oats', 'almond milk'] 
portion = [60, 200]

for idx in range(len(link)):
    get_nutrients(link[idx], name[idx])

foods = pd.append(get_nutrients(link, name), foods)
foods.to_csv('foods.csv')

#meal 1 oats
# meal 2 chicken and veg


# introduce class mealplan, subclasses ingredients/food, contains targets, ingredients, stock
Ejemplo n.º 30
0
import gc


def fix_time(df, time_cols):
    for time_col in time_cols:
        df[time_col] = pd.to_datetime(df[time_col],
                                      errors='coerce',
                                      format='%Y%m%d')
    return df


gc.enable()

df_train = pd.read_csv('~/train.csv')
df_train = pd.append((df_train, pd.read_csv('~/train_v2.csv')),
                     axis=0,
                     ignore_index=True).reset_index(drop=True)
df_test = pd.read_csv('~/sample_submission_v2.csv')

df_members = pd.read_csv('~/members_v3.csv')

df_transactions = pd.read_csv('~/transactions.csv')
df_transactions = pd.append(
    (df_transactions, pd.read_csv('~/transactions_v2.csv')),
    axis=0,
    ignore_index=True).reset_index(drop=True)
df_transactions = df_transactions.sort_values(
    by=['transaction_date'], ascending=[False]).reset_index(drop=True)
df_transactions = df_transactions.drop_duplicates(subset=['msno'],
                                                  keep='first')
Ejemplo n.º 31
0
import csv
import pandas as pd
file1 = ("C:/Users/Krithi/Desktop/Python/bright_star.csv")
file2 = ("C:/Users/Krithi/Desktop/Python/convertedStars.csv")
d1 = []
d2 = []
with open(file1, "r", encoding="utf8") as f:
    csvreader = csv.reader(f)
    for i in csvreader:
        d1.append(i)
with open(file2, "r", encoding="utf8") as f:
    csvreader = csv.reader(f)
    for i in csvreader:
        d2.append(i)
h1 = d1[0]
h2 = d2[0]
pd1 = d1[1:]
pd2 = d2[1:]
h = h1 + h2
pd = []
for i in pd1:
    pd.append(i)
for j in pd2:
    pd.append(j)
with open("totalStars.csv", "w", encoding='utf8') as f:
    csvwriter = csv.writer(f)
    csvwriter.writerow(h)
    csvwriter.writerows(pd)
#df = pd.read_csv("totalStars.csv")
#df.tail(8)
Ejemplo n.º 32
0
for i in range(1, number_of_steps):
    if label == 'mlp' or label == 'cnn' or label == 'lstm':
        model.set_weights(init_weights)

    if i * data_batch > X_train.shape[0] - 1:
        # if label == 'mlp' or label == 'cnn' or label == 'lstm':
        history = model.fit(X_train[:X_train.shape[0] - 1, :], y_train[:y_train.shape[0] - 1], epochs=100,
                            validation_data=(X_test, y_test),
                            callbacks=[early_stopping], workers=8)
        # else:
        #     history = model.fit(X_train[:X_train.shape[0] - 1, :], y_train[:y_train.shape[0] - 1])  # validation_data=(X_test, y_test), workers=8)

        predictions['size_' + str(i * data_batch)] = model.predict(X_test, y_test)

        temp = pd.DataFrame(history.history)
        stats = pd.append([stats, temp.iloc[temp.shape[0] - 1, :]])
        break

    # if label == 'mlp' or label == 'cnn' or label == 'lstm':
    history = model.fit(X_train[:i * data_batch], y_train[:i * data_batch], epochs=100,
                        validation_data=(X_test, y_test),
                        callbacks=[early_stopping], workers=8)
    # else:
    #     history = model.fit(X_train[:i * data_batch], y_train[:i * data_batch])  # ,validation_data=(X_test, y_test), workers=8)

    predictions['size_' + str(i)] = np.argmax(model.predict(X_test))#, axis=1)

    temp = pd.DataFrame(history.history)
    stats = stats.append(temp.iloc[temp.shape[0] - 1, :])

# Log of models data