def worker(tsobj): global features_to_use thisfeats = featurize.featurize_single_ts( tsobj, features_to_use=features_to_use, custom_functions=feature_sets.custom_features, raise_exceptions=False) return thisfeats
def avg_double_to_single_step(df): # http://cesium-ml.org/docs/feature_table.html bands = df.groupby('passband') ts = time_series.TimeSeries(t=bands['mjd'].apply(lambda x: x.values), m=bands['flux'].apply(lambda x: x.values), e=bands['flux_err'].apply(lambda x: x.values)) name = 'avg_double_to_single_step' features = featurize.featurize_single_ts( ts, features_to_use=[name])[name].to_dict() return pd.Series(features)
positions = [1, 12076, 24634, ...] #store pb names for labelling the features accurately pbnames = ['u', 'g', 'r', 'i', 'z', 'Y'] #loop through each object for i in len(positions): #seek to it's starting position f.seek(positions[i]) #read up to the start of the subsequent object, split by row s = f.read(positions[i + 1] - positions[i]).split('/n') #assign id, create empty lists for t/m/e idnum = s.split(',')[0] t = [[], [], [], [], [], []] m = [[], [], [], [], [], []] e = [[], [], [], [], [], []] #Append each t/m/e value to the right passband list within t/m/e/ lists for row in s.split(','): pb = row[2] t[pb].append(row[1]) m[pb].append(row[3]) e[pb].append(row[4]) #create ts obj, then generate features tsobj = TimeSeries(t=t, m=m, e=e, name=idnum, channel_names=pbnames) thisfeats = featurize.featurize_single_ts( tsobj, features_to_use=features_to_use, raise_exceptions=False ) #this is a dict where keys are feature names and values are the respective scalars with open('featfile.csv', 'a') as ff: ff.write(thisfeats) #Need to check if this would actually work. del s, idnum, pb, t, m, e, tsobj, thisfeats
list_of_indices = np.arange( 0, len(t)) #array of indicies for plotting purposes tsdict['object_ID'].append( current_object_id) #add id and target data to dictionary tsdict['target'].append(current_object_target) #create time series object for the source in question and store for transformation of test data timeobj = TimeSeries(t=t, m=m, e=e, label=current_object_target, name=current_object_id) #featurize the time series object from above features_of_time_series = featurize.featurize_single_ts( timeobj, features_to_use=feature_list, raise_exceptions=False) #print(features_of_time_series.values) tsdict['features'].append( list(features_of_time_series.values )) #add the list of time series features to the dictionary tsdict['features'][i] += list( train_meta_data.iloc[i, 1:11]) #add the static data features #Going to delete the dataframes with all of the data now that it is all organized in a dictionary del (train_data) del (train_meta_data) #******************************************************************************************************************* #Now the data is organized in the tsdict dictionary object and will be used #for the construction of a logistic regression model with l1 (LASSO) regularization
def worker(ts_obj, features_to_use): this_feats = featurize.featurize_single_ts(ts_obj, features_to_use=features_to_use, raise_exceptions=False) return this_feats