def main():
        connection = pg.connect("dbname = rem user = wireless password = wireless")

        df = psql.read_sql("select occ, noise_floor, timetag from spectruminfo order by timetag DESC LIMIT 1000", connection)

        tempocc = df['occ'].values
        tempnf = df['noise_floor'].values


        occ = np.zeros((df.shape[0],16))
        nf = np.zeros((df.shape[0],16))


        for i in range (0, len(occ)-1):
                occ[i,:] = np.copy(np.array(tempocc[i]))
                nf[i,:] = np.copy(np.array(tempnf[i]))

        


        fitness = np.zeros((16,1))
        plt.subplot(411)
        for i in range(195,210):
                plt.plot(occ[i,:])
        
        plt.subplot(412)
        plt.plot(occ[:,6])
        #plt.plot(occ[:,6])
        plt.subplot(413)
        plt.plot(occ[:,13])
        
        plt.subplot(414)
        plt.plot(occ[:,12])

        for i in range(0,16):
                thr = np.mean(nf[:,i])
                print 10.0/np.fabs(thr)
                #print np.mean(occ[:,i])
                occ[:,i] = stats.threshold(occ[:,i],threshmax = 10.0/np.fabs(thr), newval=1)
                occ[:,i] = stats.threshold(occ[:,i],threshmin = 0.9, newval=0)

        
        plt.subplot(413)
        plt.plot(occ[:,13])

        plt.subplot(412)
        plt.plot(occ[:,8])

        plt.subplot(414)
        plt.plot(occ[:,1])

        print bd.enumerate_bursts(occ[:,8], 'burstLabel')
        # print zero_runs(occ[:,8])
        #plt.hist(np.histogram(occ[:,0]), bins = [0, 1])
        

        plt.show()
Beispiel #2
0
def find_bursts(d, all_r, word_list):
    '''
    burst detection function
    '''
    s = 2   # resolution of state jumps; higher s --> fewer but stronger bursts
    gam = 0.5  # difficulty of moving up a state; larger gamma --> harder to move up states, less bursty
    n = len(d)  # number of timepoints
    smooth_win = 5

    all_bursts = pd.DataFrame(columns=['begin', 'end', 'weight'])

    for i, word, in enumerate(word_list):
        r = all_r.loc[:, word].astype(int)

        # find the optimal state sequence (using the Viterbi algorithm)
        [q,d,r,p] = bd.burst_detection(r, d, n, s, gam, smooth_win)

        # enumerate the bursts
        bursts = bd.enumerate_bursts(q, word)

        # find weights of each burst
        bursts_weighted = bd.burst_weights(bursts, r, d, p)

        # add the weighted burst to list of all bursts
        all_bursts = all_bursts.append(bursts_weighted, ignore_index=True)

        # print a progress report every 100 words
        if np.mod(i, 100) == 0:
            print('total words', len(word_list), 'word', i, 'complete')

    return all_bursts.sort_values(by='weight', ascending=False)
    def detect_bursts_of_a_timeseries(self, timeseries_df, gamma=None):
        '''
        detect intervals with bursts of activity: [start_timestamp, end_timestamp)
        :param ts_df: timeseries_df for a single platform
        :param s: multiplicative distance between states (input to burst_detection library)
        :param gamma: difficulty associated with moving up a state (input to burst_detection library)
        burst_detection library: https://pypi.org/project/burst_detection/
        '''
        if len(timeseries_df) < 2:
            return None
        r = timeseries_df[self.id_col].values
        n = len(r)
        d = np.array([sum(r)] * n, dtype=float)
        if gamma is None and np.max(r) >= 5:
            gamma = self.predict_gamma_for_timeseries(timeseries_df)
            with open('predicted_gammas.csv', 'a') as f:
                f.write(self.content_id + ',' + str(gamma) + '\n')
        else:
            return None

        q = bd.burst_detection(r, d, n, s=2, gamma=gamma, smooth_win=1)[0]
        bursts_df = bd.enumerate_bursts(q,
                                        'burstLabel')  # returns a df with 'begin' and 'end' columns for a burst where both begin and end indices are included.
        index_date = pd.Series(
            timeseries_df[self.timestamp_col].values, index=timeseries_df.index).to_dict()
        time_granularity = index_date[1] - index_date[0]
        bursts_df['start_timestamp'] = bursts_df['begin'].map(index_date)
        bursts_df['end_timestamp'] = bursts_df['end'].map(index_date)
        bursts_df['end_timestamp'] = bursts_df['end_timestamp'] + time_granularity
        if len(bursts_df) > 0:
            return bursts_df
Beispiel #4
0
def detect_bursts(company_date_abstract,topics_list):
	full_save_string = ""
	err_string = ""
	for company in company_date_abstract:
		co_bursts_str = ""
		co_list = company_date_abstract[company]
		for i,topic in enumerate(topics_list):
			bursts_string = ""
			r = []
			d = []
			for date_abs in co_list:
				abs_list = date_abs[1]
				d.append(len(abs_list))
				target_events = 0
				for ab in abs_list:
					for keyword in topic:
						if keyword in ab:
							target_events += 1
							break
				r.append(target_events)
			n = len(r)

			if all(elem == 0 for elem in r):
				continue
			try:
				q,d,r,p = bd.burst_detection(r,d,n,s=1.5,gamma=1.0,smooth_win=1) # I think the error here is that s = 2 and for 1x2 arrays of r = [1,0] and [1,1] respectively, p[0] = 1/2 so then p=1 (line 60 of burst_detection) which causes an error in line 29 of init in burst_detection. unsure if this is the error since I can't replicate on my console.
			except ValueError:
				r_str = str(r)
				d_str = str(d)
				continue
			except Exception as e:
				print('Error: ' + repr(e))
				continue
			bursts = bd.enumerate_bursts(q,'burstLabel')
			weighted_bursts = bd.burst_weights(bursts,r,d,p)
			if weighted_bursts.empty:
				continue

			kw_str = 'weighted bursts for topic no. ' + str(i) + ':' + '\n'
			bursts_string = kw_str + str(weighted_bursts) + '\n'
			beg_list = weighted_bursts['begin']
			end_list = weighted_bursts['end']
			for i in range(len(beg_list)):
				start_index = beg_list[i]
				end_index = end_list[i]
				start_date = datetime.date.fromordinal(int(co_list[start_index][0]))
				end_date = datetime.date.fromordinal(int(co_list[end_index][0]))
				date_str = '{} Start: {} End: {}\n\n'.format(i,start_date,end_date)
				bursts_string = bursts_string + date_str
			co_bursts_str = co_bursts_str + bursts_string
		if co_bursts_str != "":
			co_bursts_str = company[0].upper() + '\n' + co_bursts_str
			full_save_string += co_bursts_str
	with open('bursts_by_topic.txt','w') as f:
		f.write(full_save_string)
	with open('bursts_errors.txt','w') as f:
		f.write(err_string)
def get_bursts(topics_list,date_abs):
	bursts_string = ""
	for topic in topics_list:
		print('Topic')
		for keyword in topic:
			print(keyword)
			r = []
			d = []
			for date_abs_tuple in date_abs:
				orddate = date_abs_tuple[0]
				abs_list = date_abs_tuple[1]
				target_events = 0
				d.append(len(abs_list))
				for abstract in abs_list:
					if keyword in abstract:
						target_events += 1
				r.append(target_events)
			n = len(r)
			if all(elem == 0 for elem in r):
					continue
			print('calculating the bursts')
			try:
				q,d,r,p = bd.burst_detection(r,d,n,s=2.2,gamma=1.0,smooth_win=1)
			except Exception as e:
				print('Error: ' + repr(e))
				continue
			bursts = bd.enumerate_bursts(q,'burstLabel')
			weighted_bursts = bd.burst_weights(bursts,r,d,p)
			if weighted_bursts.empty:
				continue
			kw_str = 'weighted bursts for ' + keyword + ':' + '\n'
			bursts_string = kw_str + str(weighted_bursts) + '\n'
			beg_list = weighted_bursts['begin']
			end_list = weighted_bursts['end']
			for i in range(len(beg_list)):
				start_index = beg_list[i]
				end_index = end_list[i]
				start_date = datetime.date.fromordinal(int(date_abs[start_index][0]))
				end_date = datetime.date.fromordinal(int(date_abs[end_index][0]))
				date_str = '{} Start: {} End: {}\n\n'.format(i,start_date,end_date)
				bursts_string = bursts_string + date_str
	with open('bursts_no_company.txt','w') as f:
		f.write(bursts_string)
Beispiel #6
0
def get_total_events(tx, token_address):
    global time_list
    global first_timestamp
    events = []
    i = 0
    timestamp = 0
    day_event = 0
    first_timestamp = 0
    for row in tx.run(
            "match (n:NODE)-[t:TOKEN_TRANSFER]->(m:NODE{address:$token_address}) "
            "where t.time <= 1546214400 "
            "return t.time as time order by t.time",
            token_address=token_address):
        if i == 0:
            index = bisect.bisect_left(time_list, row["time"])
            timestamp = time_list[index]
            first_timestamp = timestamp
            i += 1
        if row["time"] <= timestamp:
            day_event += 1
        else:
            events.append(day_event)
            day_event = 0
            timestamp += 86400
            while (row["time"] > timestamp):
                events.append(0)
                timestamp += 86400
            day_event += 1
    events.append(day_event)
    maximum_day_event = max(events) + 10
    total_events = []
    for i in range(len(events)):
        total_events.append(maximum_day_event)
    file1 = open("burst_probability.csv", "a+")

    r = np.array(events, dtype=float)
    d = np.array(total_events, dtype=float)
    n = len(r)
    q, d, r, p = bd.burst_detection(r, d, n, s=1.75, gamma=1, smooth_win=3)
    file1.write("{0} {1} {2}\n".format(token_address, p[0], p[1]))
    bursts = bd.enumerate_bursts(q, 'burstLabel')
    return bursts
Beispiel #7
0
 def detect_bursts(self, s=2, gamma=0.5):
     '''
     detect intervals with bursts of activity: [begin_timestamp, end_timestamp)
     :param s: multiplicative distance between states (input to burst_detection library)
     :param gamma: difficulty associated with moving up a state (input to burst_detection library)
     burst_detection library: https://pypi.org/project/burst_detection/
    '''
     r = self.counts_df[self.id_col].values
     n = len(r)
     d = np.array([sum(r)] * n, dtype=float)
     q = bd.burst_detection(r, d, n, s, gamma, 1)[0]
     bursts_df = bd.enumerate_bursts(q, 'burstLabel')
     index_date = pd.Series(self.counts_df[self.timestamp_col].values,
                            index=self.counts_df.index).to_dict()
     bursts_df['begin_timestamp'] = bursts_df['begin'].map(index_date)
     bursts_df['end_timestamp'] = bursts_df['end'].map(index_date)
     time_granularity = index_date[1] - index_date[0]
     self.burst_intervals = [(burst['begin_timestamp'],
                              burst['end_timestamp'] + time_granularity)
                             for _, burst in bursts_df.iterrows()]
     self.update_with_burst()