def initialize(self, data_vec):
        extra_amount = 0 #Create the initial clusters from the 1st bit of data
        while True:
            current_time = 0
            initial_data = data_vec[current_time : current_time + self.settings.init_size - extra_amount]
            initial_delta_power = [np.round(initial_data[i + 1] - initial_data[i], 2) for i in range(len(initial_data) - 1)]
            initial_events = [i for i in range(len(initial_delta_power)) if (initial_delta_power[i] > self.settings.T_Positive or initial_delta_power[i] < self.settings.T_Negative)]

            try:
                trial_clusters = gsp.refined_clustering_block(initial_events, initial_delta_power, self.settings.sigma, self.settings.ri)
                break
            except np.linalg.LinAlgError:
                extra_amount += 1
                print("\tSVD didn't converge, retrying with {} less data".format(extra_amount))
                continue
        
        self.trial_clusters, self.pairs = gsp.pair_clusters_appliance_wise(self.trial_clusters, data_vec, initial_delta_power, self.settings.instancelimit)
        return (initial_delta_power, initial_events,self.settings.init_size - extra_amount)
beta  = 0.5
# this defines the  minimum number of times an appliance is set ON in considered time duration
instancelimit = 3

#%% 
main_val = df.values # get only readings
main_ind = df.index  # get only timestamp
data_vec =  main_val
signature_database = "signature_database_labelled.csv" #the signatures were extracted of power analysis from April 28th to 30th
threshold = 2000 # threshold of DTW algorithm used for appliance power signature matching

delta_p = [round(data_vec[i+1] - data_vec[i], 2) for i in range(0, len(data_vec) - 1)]
event =  [i for i in range(0, len(delta_p)) if (delta_p[i] > T_Positive or delta_p[i] < T_Negative) ]

# initial and refined clustering block of Figure 1 in the paper
clusters = gsp.refined_clustering_block(event, delta_p, sigma, ri)

# Feature matching block of Figure 1 in the paper
finalclusters, pairs = gsp.pair_clusters_appliance_wise(clusters, data_vec, delta_p, instancelimit)
appliance_pairs = gsp.feature_matching_module(pairs, delta_p, finalclusters, alpha, beta)

# create appliance wise disaggregated series
power_series, appliance_signatures = gsp.generate_appliance_powerseries(appliance_pairs, delta_p)

# label the disaggregated appliance clusters by comparing with signature DB
labeled_appliances = gsp.label_appliances(appliance_signatures, signature_database, threshold)

# Attach timestamps to generated series
power_timeseries = gsp.create_appliance_timeseries(power_series, main_ind)

# create pandas dataframe of all series
delta_p = [(data_vec[i + 1] - data_vec[i]).astype(int)
           for i in range(0,
                          len(data_vec) - 1)]
event = [
    i for i in range(0, len(delta_p))
    if (delta_p[i] > settings.T_Positive or delta_p[i] < settings.T_Negative)
]
np.savetxt('d_events.txt',
           np.array(event, dtype=np.dtype(np.int32)).astype(int),
           fmt='%i')
print('Events is {}'.format(len(event)))
# initial and refined clustering block of Figure 1 in the paper
print('Clusters with {} {} {} {}'.format(len(event), len(delta_p),
                                         settings.sigma, settings.ri))
clusters = gsp.refined_clustering_block(event, delta_p, settings.sigma,
                                        settings.ri)
print('Expected {} got {}'.format(len(event), sum([len(c) for c in clusters])))
print('Got {} clusters'.format(len(clusters)))

# Feature matching block of Figure 1 in the paper
finalclusters, pairs = gsp.pair_clusters_appliance_wise(
    clusters, data_vec, delta_p, settings.instancelimit)
print('Found {} pairs'.format(pairs))
print('Got {} clusters and {} final clusters'.format(len(clusters),
                                                     len(finalclusters)))
appliance_pairs = gsp.feature_matching_module(pairs, delta_p, finalclusters,
                                              settings.alpha, settings.beta)

# create appliance wise disaggregated series
power_series, appliance_signatures = gsp.generate_appliance_powerseries(
    appliance_pairs, delta_p)