Beispiel #1
0
    def analysis_pick_results(pick_results_file, reference_coordinate_dir,
                              reference_coordinate_symbol, particle_size,
                              minimum_distance_rate):
        """Load the picking results from a file of binary format and compare it with the reference coordinate.
        This function analysis the picking results with reference coordinate and calculate the recall, precision and the deviation from the center.
        Args:
            pick_results_file: string, the file name of the pre-picked results.
            reference_mrc_dir: string, the directory of the mrc file dir.
            reference_coordinate_symbol: the symbol of the coordinate, like '_manualpick'
            particle_size: int, the size of particle
            minimum_distance_rate: float, the default is 0.2, a picked coordinate is considered to be a true positive only when the distance between the picked coordinate and the reference coordinate is less than minimum_distance_rate mutiplicate particle_size.
        """
        with open(pick_results_file, 'rb') as f:
            coordinate = pickle.load(f)
            """
            coordinate: a list, the length of it stands for the number of picked micrograph file.
                        Each element is a list too, which contains all coordinates from the same micrograph. 
                        The length of the list stands for the number of the particles.
                        And each element in the list is a small list of length of 4.
                        The first element in the small list is the coordinate x-aixs. 
                        The second element in the small list is the coordinate y-aixs. 
                        The third element in the small list is the prediction score. 
                        The fourth element in the small list is the micrograh name. 
            """
        tp = 0.
        total_pick = 0
        total_reference = 0
        coordinate_total = []
        print len(coordinate)
        total_analyse_num = 0
        for i in range(len(coordinate)):
            #print coordinate[i]
            #print coordinate[i]
            if len(coordinate[i]) == 0:
                continue
            total_analyse_num += 1
            mrc_filename = os.path.basename(coordinate[i][0][3])
            #print(mrc_filename)
            reference_coordinate_file = mrc_filename.replace(
                '.mrc', reference_coordinate_symbol + '.star')
            reference_coordinate_file = os.path.join(
                reference_coordinate_dir, reference_coordinate_file)
            #print(reference_coordinate_file)
            if os.path.isfile(reference_coordinate_file):
                reference_coordinate = DataLoader.read_coordinate_from_star(
                    reference_coordinate_file)
                """
                reference_coordinate: a list, the length of it stands for the number of picked particles.
                            And each element in the list is a small list of length of 2.
                            The first element in the small list is the coordinate x-aixs. 
                            The second element in the small list is the coordinate y-aixs. 
                """
                tp_sigle, average_distance = Picker.calculate_tp(
                    coordinate[i], reference_coordinate,
                    particle_size * minimum_distance_rate)
                print("tp:", tp_sigle)
                print("average_distance:", average_distance)
                # calculate the number of true positive, when the threshold is set to 0.5
                tp_sigle = 0.
                total_reference = total_reference + len(reference_coordinate)
                for j in range(len(coordinate[i])):
                    coordinate_total.append(coordinate[i][j])
                    #if coordinate[i][j][2]>minimum_distance_rate:
                    threshold = 0.99
                    if coordinate[i][j][2] > threshold:
                        total_pick = total_pick + 1
                        if coordinate[i][j][4] == 1:
                            tp = tp + 1
                            tp_sigle = tp_sigle + 1
                print(tp_sigle / len(reference_coordinate))
            else:
                print("Can not find the reference coordinate:" +
                      reference_coordinate_file)
        print "tp=", tp
        print "total_pick=", total_pick
        print "total_analyse_num=", total_analyse_num
        precision = tp / total_pick
        recall = tp / total_reference
        print("(threshold %.2f)precision:%f recall:%f" %
              (minimum_distance_rate, precision, recall))
        # sort the coordinate based on prediction score in a descending order.
        coordinate_total = sorted(coordinate_total,
                                  key=itemgetter(2),
                                  reverse=True)
        total_tp = []
        total_recall = []
        total_precision = []
        total_probability = []
        total_average_distance = []
        total_distance = 0.
        tp_tem = 0.
        for i in range(len(coordinate_total)):
            if coordinate_total[i][4] == 1:
                tp_tem = tp_tem + 1
                total_distance = total_distance + coordinate_total[i][5]
            precision = tp_tem / (i + 1)
            recall = tp_tem / total_reference
            total_tp.append(tp_tem)
            total_recall.append(recall)
            total_precision.append(precision)
            total_probability.append(coordinate_total[i][2])
            if tp_tem == 0:
                average_distance = 0
            else:
                average_distance = total_distance / tp_tem
            total_average_distance.append(average_distance)
        # write the list results in file
        directory_pick = os.path.dirname(pick_results_file)
        total_results_file = os.path.join(directory_pick, 'results.txt')
        f = open(total_results_file, 'w')
        # write total_tp
        f.write(','.join(map(str, total_tp)) + '\n')
        f.write(','.join(map(str, total_recall)) + '\n')
        f.write(','.join(map(str, total_precision)) + '\n')
        f.write(','.join(map(str, total_probability)) + '\n')
        f.write(','.join(map(str, total_average_distance)) + '\n')
        f.write('#total autopick number:%d\n' % (len(coordinate_total)))
        f.write('#total manual pick number:%d\n' % (total_reference))
        f.write('#the first row is number of true positive\n')
        f.write('#the second row is recall\n')
        f.write('#the third row is precision\n')
        f.write('#the fourth row is probability\n')
        f.write('#the fiveth row is distance\n')

        # show the recall and precision
        times_of_manual = len(coordinate_total) // total_reference + 1
        for i in range(times_of_manual):
            print(
                'autopick_total sort, take the head number of total_manualpick * ratio %d'
                % (i + 1))
            f.write(
                '#autopick_total sort, take the head number of total_manualpick * ratio %d \n'
                % (i + 1))
            if i == times_of_manual - 1:
                print('precision:%f \trecall:%f' %
                      (total_precision[-1], total_recall[-1]))
                f.write('precision:%f \trecall:%f \n' %
                        (total_precision[-1], total_recall[-1]))
            else:
                print('precision:%f \trecall:%f' %
                      (total_precision[(i + 1) * total_reference - 1],
                       total_recall[(i + 1) * total_reference - 1]))
                f.write('precision:%f \trecall:%f \n' %
                        (total_precision[(i + 1) * total_reference - 1],
                         total_recall[(i + 1) * total_reference - 1]))
        f.close()
    def analysis_pick_results(pick_results_file, reference_coordinate_dir, reference_coordinate_symbol, particle_size, minimum_distance_rate):
        """Load the picking results from a file of binary format and compare it with the reference coordinate.

        This function analysis the picking results with reference coordinate and calculate the recall, precision and the deviation from the center.

        Args:
            pick_results_file: string, the file name of the pre-picked results.
            reference_mrc_dir: string, the directory of the mrc file dir.
            reference_coordinate_symbol: the symbol of the coordinate, like '_manualpick'
            particle_size: int, the size of particle
            minimum_distance_rate: float, the default is 0.2, a picked coordinate is considered to be a true positive only when the distance between the picked coordinate and the reference coordinate is less than minimum_distance_rate mutiplicate particle_size.
        """
        with open(pick_results_file, 'rb') as f:
            coordinate = pickle.load(f)
            """
            coordinate: a list, the length of it stands for the number of picked micrograph file.
                        Each element is a list too, which contains all coordinates from the same micrograph. 
                        The length of the list stands for the number of the particles.
                        And each element in the list is a small list of length of 4.
                        The first element in the small list is the coordinate x-aixs. 
                        The second element in the small list is the coordinate y-aixs. 
                        The third element in the small list is the prediction score. 
                        The fourth element in the small list is the micrograh name. 
            """
        tp = 0
        total_pick = 0
        total_reference = 0
        coordinate_total = []
        for i in range(len(coordinate)):
            mrc_filename = os.path.basename(coordinate[i][0][3])
            #print(mrc_filename)
            reference_coordinate_file = mrc_filename.replace('.mrc', reference_coordinate_symbol+'.star')
            reference_coordinate_file = os.path.join(reference_coordinate_dir, reference_coordinate_file)
            #print(reference_coordinate_file)
            if os.path.isfile(reference_coordinate_file):
                reference_coordinate = DataLoader.read_coordinate_from_star(reference_coordinate_file)
                """
                reference_coordinate: a list, the length of it stands for the number of picked particles.
                            And each element in the list is a small list of length of 2.
                            The first element in the small list is the coordinate x-aixs. 
                            The second element in the small list is the coordinate y-aixs. 
                """    
                tp_sigle, average_distance = AutoPicker.calculate_tp(coordinate[i], reference_coordinate, particle_size*minimum_distance_rate)
                #print("tp:",tp_sigle)
                #print("average_distance:",average_distance)
                # calculate the number of true positive, when the threshold is set to 0.5
                tp_sigle = 0
                total_reference = total_reference + len(reference_coordinate)
                for j in range(len(coordinate[i])):
                    coordinate_total.append(coordinate[i][j])
                    if coordinate[i][j][2]>0.5:
                        total_pick = total_pick + 1
                        if coordinate[i][j][4] == 1:
                            tp = tp + 1
                            tp_sigle = tp_sigle + 1
                print(tp_sigle/len(reference_coordinate))
            else:
                print("Can not find the reference coordinate:"+reference_coordinate_file)
        precision = tp/total_pick
        recall = tp/total_reference
        print("(threshold 0.5)precision:%f recall:%f"%(precision, recall))
        # sort the coordinate based on prediction score in a descending order.
        coordinate_total = sorted(coordinate_total, key = itemgetter(2), reverse = True) 
        total_tp = []
        total_recall = []
        total_precision = []
        total_probability = []
        total_average_distance = []
        total_distance = 0
        tp_tem = 0
        for i in range(len(coordinate_total)):
            if coordinate_total[i][4] == 1:
                tp_tem = tp_tem + 1
                total_distance = total_distance + coordinate_total[i][5]
            precision = tp_tem/(i+1)
            recall = tp_tem/total_reference
            total_tp.append(tp_tem)
            total_recall.append(recall)
            total_precision.append(precision)
            total_probability.append(coordinate_total[i][2])
            if tp_tem==0:
                average_distance = 0
            else:
                average_distance = total_distance/tp_tem
            total_average_distance.append(average_distance)
        # write the list results in file
        directory_pick = os.path.dirname(pick_results_file)
        total_results_file = os.path.join(directory_pick, 'results.txt')
        f = open(total_results_file, 'w')
        # write total_tp
        f.write(','.join(map(str, total_tp))+'\n')
        f.write(','.join(map(str, total_recall))+'\n')
        f.write(','.join(map(str, total_precision))+'\n')
        f.write(','.join(map(str, total_probability))+'\n')
        f.write(','.join(map(str, total_average_distance))+'\n')
        f.write('#total autopick number:%d\n'%(len(coordinate_total))) 
        f.write('#total manual pick number:%d\n'%(total_reference))
        f.write('#the first row is number of true positive\n')
        f.write('#the second row is recall\n')
        f.write('#the third row is precision\n')
        f.write('#the fourth row is probability\n')
        f.write('#the fiveth row is distance\n')    
        
        # show the recall and precision
        times_of_manual = len(coordinate_total)//total_reference + 1
        for i in range(times_of_manual):
            print('autopick_total sort, take the head number of total_manualpick * ratio %d'%(i+1))
            f.write('#autopick_total sort, take the head number of total_manualpick * ratio %d \n'%(i+1))
            if i==times_of_manual-1:
                print('precision:%f \trecall:%f'%(total_precision[-1], total_recall[-1]))
                f.write('precision:%f \trecall:%f \n'%(total_precision[-1], total_recall[-1]))
            else:
                print('precision:%f \trecall:%f'%(total_precision[(i+1)*total_reference-1], total_recall[(i+1)*total_reference-1]))
                f.write('precision:%f \trecall:%f \n'%(total_precision[(i+1)*total_reference-1], total_recall[(i+1)*total_reference-1]))
        f.close()
Beispiel #3
0
    def pick(self, mrc_filename):
        if mrc_filename.endswith('.rec'):
            header, body = DataLoader.readRecFile(mrc_filename)
        else:
            header, body = DataLoader.readMrcFile(mrc_filename)
        if header == None or body == None:
            return []
        num_col = header[0]
        num_row = header[1]
        body_2d = np.array(body, dtype=np.float32).reshape(num_row, num_col)
        body_2d_ori = body_2d

        body_2d, bin_size = DataLoader.preprocess_micrograph(body_2d)
        step_size = 4
        candidate_patches = None
        candidate_patches_exist = False
        num_total_patch = 0
        patch_size = int(self.particle_size / bin_size)
        local_window_size = int(patch_size / step_size)
        #local_window_size = int(0.6*patch_size)
        map_col = int((body_2d.shape[0] - patch_size + 1) / step_size)
        map_row = int((body_2d.shape[1] - patch_size + 1) / step_size)
        time1 = time.time()
        particle_candidate_all = []
        map_index_col = 0
        for col in range(0, body_2d.shape[0] - patch_size, step_size):
            for row in range(0, body_2d.shape[1] - patch_size, step_size):
                patch = np.copy(body_2d[col:(col + patch_size),
                                        row:(row + patch_size)])
                #patch = DataLoader.preprocess_particle(patch, self.model_input_size)
                particle_candidate_all.append(patch)
                num_total_patch = num_total_patch + 1
            map_index_col = map_index_col + 1

        map_index_row = map_index_col - map_col + map_row
        #particle_candidate_all = np.array(particle_candidate_all).reshape(
        #        num_total_patch, self.model_input_size[1], self.model_input_size[2], 1)
        particle_candidate_all = np.array(particle_candidate_all).reshape(
            num_total_patch, patch_size, patch_size, 1)
        predictions = self.deepModel.evaluation(particle_candidate_all,
                                                self.sess)
        predictions = predictions[:, 1:2]
        predictions = predictions.reshape(map_index_col, map_index_row)
        time_cost = time.time() - time1
        if self.verbose:
            print("gpu time: %.1f s" % time_cost)
        list_coordinate = self.peak_detection(predictions, local_window_size)
        for i in range(len(list_coordinate)):
            list_coordinate[i].append(mrc_filename)
            list_coordinate[i][0] = (list_coordinate[i][0] * step_size +
                                     patch_size / 2) * bin_size
            list_coordinate[i][1] = (list_coordinate[i][1] * step_size +
                                     patch_size / 2) * bin_size

        #return all coordinate
        list_coordinate_all = [i for i in list_coordinate if i[2] > 0.0]
        list_coordinate_all = sorted(list_coordinate_all,
                                     key=lambda x: x[2],
                                     reverse=True)

        #print ("size = ", len(list_coordinate))
        list_coordinate = [i for i in list_coordinate if i[2] > self.threshold]
        list_coordinate = sorted(list_coordinate,
                                 key=lambda x: x[2],
                                 reverse=True)
        #print ("filtered size = ", len(list_coordinate))
        #list_coordinate = list_coordinate[:100]
        print("#candidate:%d, #picked:%d" %
              (num_total_patch, len(list_coordinate)))
        plot_list_coordinate = copy.deepcopy(list_coordinate)

        for i in range(len(plot_list_coordinate)):
            plot_list_coordinate[i][0] = plot_list_coordinate[i][0] / bin_size
            plot_list_coordinate[i][1] = plot_list_coordinate[i][1] / bin_size
        if self.plot_picking_result:
            #print ">>>>>>>>>>>>>>>", body_2d.shape
            reference_coordinate_file = mrc_filename.replace(
                '.mrc', '_DW_recentered.star')
            reference_coordinate_file = os.path.join(
                '/data00/Data/piezo/train', reference_coordinate_file)
            #print(reference_coordinate_file)
            if os.path.isfile(reference_coordinate_file):
                reference_coordinate = DataLoader.read_coordinate_from_star(
                    reference_coordinate_file)
                for i in range(len(reference_coordinate)):
                    reference_coordinate[i][
                        0] = reference_coordinate[i][0] / bin_size
                    reference_coordinate[i][
                        1] = reference_coordinate[i][1] / bin_size
            #display.plot_circle_in_micrograph(body_2d, reference_coordinate, plot_list_coordinate, patch_size, "plot/micro_circle_%s.png" % (os.path.basename(mrc_filename)))
            #plot_dir = os.path.basename(self.output_dir)
            plot_dir = os.path.join(os.path.abspath(self.output_dir), "plot")
            #pos = plot_dir.rfind('/')
            #plot_dir = os.path.join(plot_dir[:pos], 'plot')
            if self.verbose:
                print "plot_dir >>>>>>>>>> ", plot_dir
            if self.plot_picking_result and os.path.exists(plot_dir) == False:
                os.makedirs(plot_dir)
            display.plot_circle_in_micrograph(
                body_2d, plot_list_coordinate, patch_size, plot_dir +
                "/micro_circle_%s.png" % (os.path.basename(mrc_filename)))
        #display.plot_circle_in_micrograph(body_2d_ori, list_coordinate, self.particle_size, "plot/micro_circle_%s.png" % (os.path.basename(mrc_filename)))
        return list_coordinate, list_coordinate_all