コード例 #1
0
def create_correlation_table(plot_variable1, plot_variable2, values1, values2, seconds):

    if len(values2) == len(values1):
        print ''
        print 'Creating new table...'
        print ''

        number_of_plates1 = 0
        number_of_plates2 = 0

        if plot_variable1[0][0] in ('pulseheights', 'integrals'):
            number_of_plates1 = get_number_of_plates(values1[0])
        if plot_variable2[0][0] in ('pulseheights', 'integrals'):
            number_of_plates2 = get_number_of_plates(values2[0])

        # declare a class
        class Variable1(IsDescription):
            if number_of_plates1 in range(1, 5) and number_of_plates2 in range(1, 5):
                variable1 = Float64Col(shape=(number_of_plates1,))
                variable2 = Float64Col(shape=[number_of_plates2,])
            elif number_of_plates1 in range(1, 5) and number_of_plates2 not in range(1, 5):
                variable1 = Float64Col(shape=(number_of_plates1,))
                variable2 = Float64Col()
            elif number_of_plates1 not in range(1, 5) and number_of_plates2 in range(1, 5):
                variable1 = Float64Col()
                variable2 = Float64Col(shape=(number_of_plates2,))
            elif number_of_plates1 not in range(1, 5) and number_of_plates2 not in range(1, 5):
                variable1 = Float64Col()
                variable2 = Float64Col()
            else:
                print 'weird'

        # create filename for correlation table from data filenames
        intermediate1 = plot_variable1[0][1].replace('data_s%s_' % plot_variable1[0][2], '')
        intermediate2 = intermediate1.partition('_')
        start_date = intermediate2[0]
        intermediate3 = intermediate2[2][1:]
        end_date = intermediate3.replace('.h5', '')

        filename = (('interpolated_table_%s_station%s_with_%s_station%s_%s_%s'
                     '_timeinterval_%d.h5') %
                    (plot_variable1[0][0], plot_variable1[0][2],
                     plot_variable2[0][0], plot_variable2[0][2],
                     start_date, end_date, seconds))

        # make new table
        with openFile(filename, 'w') as data_cor:
            group_variable1 = data_cor.createGroup("/", 'correlation')
            table_variable1 = data_cor.createTable(group_variable1, 'table', Variable1)

            # Insert a new particle record
            particle = table_variable1.row

            for i in range(len(values1)):
                particle['variable1'] = values1[i]
                particle['variable2'] = values2[i]
                particle.append()

            table_variable1.flush()

        print 'Done.'
        return filename
    else:
        print 'Your variable lists are not of the same length. Interpolation is necessary.'
コード例 #2
0
ファイル: plot_data.py プロジェクト: HiSPARC/correlation
def plot_data(plot_variable):
    MPV = False

    # These variables are stored per plate, so we have 2 or 4 values
    # for one event instead of just 1.

    if plot_variable[0][0] in ('pulseheights', 'integrals',
                               'baseline', 'std_dev', "n_peaks"):

        if plot_variable[0][0] in ('pulseheights', 'integrals'):
            MPV = query_yes_no('Do you want to PLOT the MPV (Most probable value) of the %s?' % plot_variable[0][0])

        if MPV == True:
            print ''
            interval = question.digit("Select the time interval (in seconds) over which the MPV values must be calculated ( e.g. for a day enter '86400' ): ")
            seconds = int(interval)
            variable_parts, time, number_of_plates = split_data_file_in_parts(plot_variable, seconds)
            #e.g. variable_parts = [[p1, p2...pn], [p1, p2...pn], ....]
            # time [t1, t2...tn] times are timestamps in the middle of every time interval

            if plot_variable[0][0] == 'pulseheights':
                MPV_list, number_of_plates, times = find_MPV_pulseheights(variable_parts, plot_variable, time, number_of_plates)

                #times = array(time) necessary?

                times_dates = [datetime.fromtimestamp(x) for x in times]

                plot_variable1 = [('pulseheights', plot_variable[0][1], plot_variable[0][2], 'events')]
                plot_variable2 = [('time', plot_variable[0][1], plot_variable[0][2], 'events')]

                values1 = MPV_list
                values2 = time

                filename = create_correlation_table(plot_variable1, plot_variable2, values1, values2, seconds)

            elif plot_variable[0][0] == 'integrals':
                MPV_list, number_of_plates = find_MPV_integrals(variable_parts, plot_variable)

            values = array(MPV_list)

            for i in range(number_of_plates):
                y = values[:, i]
                plt.plot(times_dates, y)
            plt.xlabel('time')
            plt.ylabel('%s (%s)' % (plot_variable[0][0], units[plot_variable[0][0]]))

            # create filename for correlation table from data filenames
            intermediate1 = plot_variable[0][1].replace('data_s%s_' % str(plot_variable[0][2]), '')
            intermediate2 = intermediate1.partition('_')
            start_date = intermediate2[0]

            intermediate1b = plot_variable[-1][1].replace('data_s%s_' % str(plot_variable[-1][2]), '')
            intermediate2b = intermediate1b.partition('_')
            intermediate3b = intermediate2b[2][1:]
            end_date = intermediate3b.replace('.h5', '')

            fname = ('MPV_%s_s%s_%s-%s_timeinterval_%d_seconds.png' %
                     (plot_variable[0][0], plot_variable[0][2], start_date, end_date, seconds))
            plt.savefig(fname)
            plt.show()

            returntype = 'MPV'

        else:
            time_list = []
            plate1 = []
            plate2 = []
            plate3 = []
            plate4 = []
            number_of_plates = 0

            for i in range(len(plot_variable)):
                with tables.openFile(plot_variable[i][1], 'r') as data:
                    tree_time = "data.root.s%s.%s.col('timestamp')" % (plot_variable[i][2], plot_variable[i][3])
                    time = eval(tree_time)

                    tree_variable = "data.root.s%s.%s.col('%s')" % (plot_variable[i][2], plot_variable[i][3], plot_variable[i][0])
                    variable = eval(tree_variable)

                time_list.extend(time)

                number_of_plates = get_number_of_plates(variable[0])

                plate1.extend(list(variable[:, 0]))
                plate2.extend(list(variable[:, 1]))
                if number_of_plates == 4:
                    plate3.extend(list(variable[:, 2]))
                    plate4.extend(list(variable[:, 3]))

            dat_sorted1 = array(sorted(zip(time_list, plate1)))
            dat_sorted2 = array(sorted(zip(time_list, plate2)))
            if number_of_plates == 4:
                dat_sorted3 = array(sorted(zip(time_list, plate3)))
                dat_sorted4 = array(sorted(zip(time_list, plate4)))

            print ''
            print 'Your file(s) contain(s) data from ' + str(datetime.fromtimestamp(dat_sorted1[0][0])) + ' until ' + str(datetime.fromtimestamp(dat_sorted1[-1][0]))
            print ''
            whole = query_yes_no('Do you want to PLOT this whole time interval')

            if whole == True:
                times = dat_sorted1[:, 0]
                x = [datetime.fromtimestamp(i) for i in times]

                y = dat_sorted1[:, 1]
                plt.plot(x, y)
                y = dat_sorted2[:, 1]
                plt.plot(x, y)

                if number_of_plates == 4:
                    y = dat_sorted3[:, 1]
                    plt.plot(x, y)
                    y = dat_sorted4[:, 1]
                    plt.plot(x, y)
                    values = array(zip(dat_sorted1[:, 1], dat_sorted2[:, 1],
                                       dat_sorted3[:, 1], dat_sorted4[:, 1]))
                else:
                    values = array(zip(dat_sorted1[:, 1], dat_sorted2[:, 1]))

                plt.ylabel('%s (%s)' % (plot_variable[0][0], units[plot_variable[0][0]]))

                plt.grid(True)
                plt.show()

                returntype = 'whole'

            elif whole == False:

                x_lim_low = 2
                x_lim_up = 2

                while True:

                    start = datetime.fromtimestamp(dat_sorted1[0][0])

                    print ''
                    print 'Start time: ' + str(start)
                    print 'Seconds in interval: ' + str(dat_sorted1[-1][0] - dat_sorted1[0][0])
                    print ''
                    print 'First you are going to enter the LOWER time limit.'
                    while True:
                        x_lim_low = question.digit('Enter the number of seconds after the start time shown above ( e.g. input "3600" means x_begin = timestamp + 3600 s ): ')
                        x_lim_low = int(x_lim_low)
                        if dat_sorted1[0][0] + x_lim_low <= dat_sorted1[-1][0]:
                            break
                        else:
                            print "Oops! Your lower time limit lies beyond your data set. Try again."
                    print ''
                    print 'Now you are going to enter the UPPER time limit.'
                    while True:
                        x_lim_up = question.digit('Enter the number of seconds after the start time shown above ( e.g. input "86400" means x_end = timestamp + 86400 s ): ')
                        x_lim_up = int(x_lim_up)
                        if x_lim_up > x_lim_low and dat_sorted1[0][0] + x_lim_up <= dat_sorted1[-1][0]:
                            break
                        elif x_lim_up <= x_lim_low:
                            print "Oops! The upper time limit less than or equal to the lower time limit. Try again."
                        elif dat_sorted1[0][0] + x_lim_up > dat_sorted1[-1][0]:
                            print "Oops! Your upper time limit lies beyond your data set. Try again."

                    plot_list1 = array([[t, v] for t, v in dat_sorted1 if dat_sorted1[0][0] + x_lim_low < t < dat_sorted1[0][0] + x_lim_up])
                    plot_list2 = array([[t, v] for t, v in dat_sorted2 if dat_sorted2[0][0] + x_lim_low < t < dat_sorted2[0][0] + x_lim_up])
                    if number_of_plates == 4:
                        plot_list3 = array([[t, v] for t, v in dat_sorted3 if dat_sorted3[0][0] + x_lim_low < t < dat_sorted3[0][0] + x_lim_up])
                        plot_list4 = array([[t, v] for t, v in dat_sorted4 if dat_sorted4[0][0] + x_lim_low < t < dat_sorted4[0][0] + x_lim_up])
                        values = array(zip(plot_list1[:, 1], plot_list2[:, 1],
                                           plot_list3[:, 1], plot_list4[:, 1]))
                    else:
                        values = array(zip(plot_list1[:, 1], plot_list2[:, 1]))

                    times = plot_list1[:, 0]
                    x = [datetime.fromtimestamp(i) for i in times]

                    plt.plot(x, plot_list1[:, 1])
                    plt.plot(x, plot_list2[:, 1])
                    if number_of_plates == 4:
                        plt.plot(x, plot_list3[:, 1])
                        plt.plot(x, plot_list4[:, 1])

                    plt.ylabel('%s (%s)' % (plot_variable[0][0], units[plot_variable[0][0]]))

                    returntype = 'part'
                    plt.show()

                    again = query_yes_no('Do you want to plot again with different values for the UPPER and LOWER time?')
                    if again != True:
                        break

    else:
        variable_list = []
        time_list = []

        for i in range(len(plot_variable)):
            with tables.openFile(plot_variable[i][1], 'r') as data:
                tree_time = "data.root.s%s.%s.col('timestamp')" % (plot_variable[i][2], plot_variable[i][3])
                time = eval(tree_time)

                tree_variable = "data.root.s%s.%s.col('%s')" % (plot_variable[i][2], plot_variable[i][3], plot_variable[i][0])
                variable = eval(tree_variable)

            time_list.extend(time)
            variable_list.extend(variable)

        dat_sorted = array(sorted(zip(time_list, variable_list)))

        print ''
        print 'Your file(s) contain(s) data from %s until %s' % (str(datetime.fromtimestamp(dat_sorted[0][0])), str(datetime.fromtimestamp(dat_sorted[-1][0])))
        print ''
        whole = query_yes_no('Do you want to PLOT this whole time interval?')

        if whole == True:
            times = dat_sorted[:, 0]
            x = [datetime.fromtimestamp(i) for i in times]

            dat_sorted = array(dat_sorted)
            values = dat_sorted[:, 1]
            plt.plot(x, values)

            plt.ylabel('%s (%s)' % (plot_variable[0][0], units[plot_variable[0][0]]))
            plt.grid(True)
            plt.show()
            returntype = 'whole'

        elif whole == False:

            x_lim_low = 2
            x_lim_up = 2
            plot_list = []

            while True:

                start = datetime.fromtimestamp(dat_sorted[0][0])
                print ''
                print 'Start time = ' + str(start)
                print 'Seconds in interval: ' + str(dat_sorted[-1][0] - dat_sorted[0][0])
                print ''
                print 'First you are going to enter the LOWER time limit.'
                while True:
                    x_lim_low = question.digit('Enter the number of seconds after the start time shown above ( e.g. input "3600" means x_begin = timestamp + 3600 s ): ')
                    x_lim_low = int(x_lim_low)
                    if dat_sorted[0][0] + x_lim_low <= dat_sorted[-1][0]:
                        break
                    else:
                        print "Oops! Your lower time limit lies beyond your data set. Try again."
                print ''
                print 'Now you are going to enter the UPPER time limit.'
                while True:
                    x_lim_up = question.digit('Enter the number of seconds after the start time shown above ( e.g. input "86400" means x_end = timestamp + 86400 s ): ')
                    x_lim_up = int(x_lim_up)
                    if x_lim_up > x_lim_low and dat_sorted[0][0] + x_lim_up <= dat_sorted[-1][0]:
                        break
                    elif x_lim_up <= x_lim_low:
                        print "Oops! The upper time limit less than or equal to the lower time limit. Try again."
                    elif dat_sorted[0][0] + x_lim_up > dat_sorted[-1][0]:
                        print "Oops! Your upper time limit lies beyond your data set. Try again."

                plot_list = []
                # e.g. dat_sorted = (timestamp, variable)

                for t, v in dat_sorted:
                    if t > dat_sorted[0][0] + x_lim_low and t < dat_sorted[0][0] + x_lim_up:
                         plot_list.append([t, v])

                plot_list = array(plot_list)

                times = plot_list[:, 0]
                values = plot_list[:, 1]

                x = [datetime.fromtimestamp(i) for i in times]

                plt.plot(x, values)
                plt.ylabel('%s (%s)' % (plot_variable[0][0], units[plot_variable[0][0]]))
                plt.grid(True)
                plt.show()
                returntype = 'part'

                again = query_yes_no('Do you want to plot again with a different time limits?')
                if again != True:
                    break

    return values, times, returntype
コード例 #3
0
def find_MPV_integrals(pulseintegral_list, plot_variable):

    print ''
    show_plot = query_yes_no('Do you want to see a plot of every individual fit for every plate?')
    MPV_list = []

    for pi in pulseintegral_list:

        print 'Time interval %d of %d.' % (pulseintegral_list.index(pi) + 1, len(pulseintegral_list))
        print ''

        MPV_MIPs = []
        number_of_plates = get_number_of_plates(pi[0])

        for i in range(number_of_plates):

            print ''
            print 'plate %d ' % i + 1

            p_plate_total = array(pi[:, i])

            hist = plt.hist(p_plate_total, 750)
            binsize = hist[0]
            value = hist[1]
            del hist
            plt.clf()

            binsize_sort = sorted(binsize)

            photon_bin = 0

            for g in range(len(binsize)):
                if binsize[g] == binsize_sort[-1]:
                    photon_bin = g
                    break

            for h in range(len(binsize)):
                if h > photon_bin and binsize[h + 1] > binsize[h]:
                    lowest_bin = h
                    lowest_p = value[h]
                    break

            binsize = binsize[lowest_bin:]
            value = value[lowest_bin:]
            binsize_sort = sorted(binsize)
            MPV_bin = binsize_sort[-1]

            for j in range(len(binsize)):
                if binsize[j] == MPV_bin:
                    MPV_value = (value[j] - value[j - 1]) / 2 + value[j]

            #print 'MPV = ', MPV_value

            extra = [1500, 1250, 1000, 750, 500]

            list = []
            success = False

            for k in extra:

                pi_red = [pi for pi in p_plate_total if (MPV_value - k < pi < MPV_value + k)]

                plo = plt.hist(pi_red, 500)
                del pi_red

                values = plo[0]
                #print values
                bins = plo[1]
                del plo
                bin_centers = (bins[:-1] + bins[1:]) / 2
                del bins
                #print bin_centers

                plt.clf()


                try:
                    popt, pcov = curve_fit(func, bin_centers, values)
                    del pcov
                    a = popt[0]
                    b = popt[1]
                    c = popt[2]

                    deviation = abs(MPV_value - b)

                    #print 'THE VALUES deviation,a,b,c,k:  ', deviation,a,b,c,k

                    if a < 0 and deviation < 500:
                        success = True
                        #print success
                        list.append([deviation, a, b, c, k])

                except RuntimeError:
                    print 'Error'

            if success:
                list = sorted(list)
                MPV_MIPs.append(list[0][2])

                print ''
                afw = "Deviation  = %g" % list[0][0]
                print afw
                ap = "a = %g" % list[0][1]
                print ap
                bp = "b = %g" % list[0][2]
                print bp
                cp = "c = %g" % list[0][3]
                print cp
                kp = "k = %g" % list[0][4]
                print kp
                print ''

                if show_plot:
                    hist = plt.hist(p_plate_total, 750)
                    plt.yscale('log')
                    plt.axvline(list[0][2])
                    print 'Close plot to continue...'
                    print ''
                    plt.show()

                print 'MPV value = %.2f %s' % (list[0][2], units[plot_variable[0][0]])
                print ''

            else:
                MPV_MIPs.append(MPV_value)
                print ''
                print 'WARNING: Bad fit'
                print ''

                print ''
                print 'MPV value = %.2f %s' % (MPV_value, units[plot_variable[0][0]])
                print ''

                if show_plot:
                    hist = plt.hist(p_plate_total, 750)
                    plt.yscale('log')
                    plt.axvline(MPV_value)
                    print ''
                    print 'Close plot to continue...'
                    plt.show()

            #print MPV_MIPs

        MPV_list.append(MPV_MIPs)

    return MPV_list, number_of_plates