Example #1
0
def worker(tsobj):
    global features_to_use
    thisfeats = featurize.featurize_single_ts(
        tsobj,
        features_to_use=features_to_use,
        custom_functions=feature_sets.custom_features,
        raise_exceptions=False)
    return thisfeats
def avg_double_to_single_step(df):

    # http://cesium-ml.org/docs/feature_table.html

    bands = df.groupby('passband')

    ts = time_series.TimeSeries(t=bands['mjd'].apply(lambda x: x.values),
                                m=bands['flux'].apply(lambda x: x.values),
                                e=bands['flux_err'].apply(lambda x: x.values))

    name = 'avg_double_to_single_step'
    features = featurize.featurize_single_ts(
        ts, features_to_use=[name])[name].to_dict()
    return pd.Series(features)
Example #3
0
positions = [1, 12076, 24634, ...]

#store pb names for labelling the features accurately
pbnames = ['u', 'g', 'r', 'i', 'z', 'Y']

#loop through each object
for i in len(positions):
    #seek to it's starting position
    f.seek(positions[i])
    #read up to the start of the subsequent object, split by row
    s = f.read(positions[i + 1] - positions[i]).split('/n')
    #assign id, create empty lists for t/m/e
    idnum = s.split(',')[0]
    t = [[], [], [], [], [], []]
    m = [[], [], [], [], [], []]
    e = [[], [], [], [], [], []]
    #Append each t/m/e value to the right passband list  within t/m/e/ lists
    for row in s.split(','):
        pb = row[2]
        t[pb].append(row[1])
        m[pb].append(row[3])
        e[pb].append(row[4])
        #create ts obj, then generate features
    tsobj = TimeSeries(t=t, m=m, e=e, name=idnum, channel_names=pbnames)
    thisfeats = featurize.featurize_single_ts(
        tsobj, features_to_use=features_to_use, raise_exceptions=False
    )  #this is a dict where keys are feature names and values are the respective scalars
    with open('featfile.csv', 'a') as ff:
        ff.write(thisfeats)  #Need to check if this would actually work.
    del s, idnum, pb, t, m, e, tsobj, thisfeats
    list_of_indices = np.arange(
        0, len(t))  #array of indicies for plotting purposes

    tsdict['object_ID'].append(
        current_object_id)  #add id and target data to dictionary
    tsdict['target'].append(current_object_target)

    #create time series object for the source in question and store for transformation of test data
    timeobj = TimeSeries(t=t,
                         m=m,
                         e=e,
                         label=current_object_target,
                         name=current_object_id)

    #featurize the time series object from above
    features_of_time_series = featurize.featurize_single_ts(
        timeobj, features_to_use=feature_list, raise_exceptions=False)

    #print(features_of_time_series.values)
    tsdict['features'].append(
        list(features_of_time_series.values
             ))  #add the list of time series features to the dictionary
    tsdict['features'][i] += list(
        train_meta_data.iloc[i, 1:11])  #add the static data features

#Going to delete the dataframes with all of the data now that it is all organized in a dictionary
del (train_data)
del (train_meta_data)

#*******************************************************************************************************************
#Now the data is organized in the tsdict dictionary object and will be used
#for the construction of a logistic regression model with l1 (LASSO) regularization
Example #5
0
def worker(ts_obj, features_to_use):
    this_feats = featurize.featurize_single_ts(ts_obj,
                                               features_to_use=features_to_use,
                                               raise_exceptions=False)

    return this_feats