def initialize(self, data_vec):
        extra_amount = 0 #Create the initial clusters from the 1st bit of data
        while True:
            current_time = 0
            initial_data = data_vec[current_time : current_time + self.settings.init_size - extra_amount]
            initial_delta_power = [np.round(initial_data[i + 1] - initial_data[i], 2) for i in range(len(initial_data) - 1)]
            initial_events = [i for i in range(len(initial_delta_power)) if (initial_delta_power[i] > self.settings.T_Positive or initial_delta_power[i] < self.settings.T_Negative)]

            try:
                trial_clusters = gsp.refined_clustering_block(initial_events, initial_delta_power, self.settings.sigma, self.settings.ri)
                break
            except np.linalg.LinAlgError:
                extra_amount += 1
                print("\tSVD didn't converge, retrying with {} less data".format(extra_amount))
                continue
        
        self.trial_clusters, self.pairs = gsp.pair_clusters_appliance_wise(self.trial_clusters, data_vec, initial_delta_power, self.settings.instancelimit)
        return (initial_delta_power, initial_events,self.settings.init_size - extra_amount)
#%% 
main_val = df.values # get only readings
main_ind = df.index  # get only timestamp
data_vec =  main_val
signature_database = "signature_database_labelled.csv" #the signatures were extracted of power analysis from April 28th to 30th
threshold = 2000 # threshold of DTW algorithm used for appliance power signature matching

delta_p = [round(data_vec[i+1] - data_vec[i], 2) for i in range(0, len(data_vec) - 1)]
event =  [i for i in range(0, len(delta_p)) if (delta_p[i] > T_Positive or delta_p[i] < T_Negative) ]

# initial and refined clustering block of Figure 1 in the paper
clusters = gsp.refined_clustering_block(event, delta_p, sigma, ri)

# Feature matching block of Figure 1 in the paper
finalclusters, pairs = gsp.pair_clusters_appliance_wise(clusters, data_vec, delta_p, instancelimit)
appliance_pairs = gsp.feature_matching_module(pairs, delta_p, finalclusters, alpha, beta)

# create appliance wise disaggregated series
power_series, appliance_signatures = gsp.generate_appliance_powerseries(appliance_pairs, delta_p)

# label the disaggregated appliance clusters by comparing with signature DB
labeled_appliances = gsp.label_appliances(appliance_signatures, signature_database, threshold)

# Attach timestamps to generated series
power_timeseries = gsp.create_appliance_timeseries(power_series, main_ind)

# create pandas dataframe of all series
gsp_result = pd.concat(power_timeseries, axis = 1)

labels= [i[1] for i in list(labeled_appliances.items())]