Example #1
0
 def getAlignmentMatrix(self, alignment_id):
     sys.stderr.write("Getting alignment matrix for alignment=%s ..." %
                      (alignment_id))
     snp_pos_ls = []
     accession_id_ls = []
     name_ls = []
     data_matrix = []
     rows = Sequence.query.filter_by(alignment=alignment_id).order_by(
         Sequence.accession).all()
     counter = 0
     for row in rows:
         if counter == 0:
             snp_pos_ls = self.get_snp_pos_ls(row.alignment_obj.target,
                                              row.alignment_obj.chromosome,
                                              row.alignment_obj.start)
         accession_id_ls.append(row.accession)
         name_ls.append(row.accession_obj.name)
         data_row = dict_map(nt2number, row.bases)
         data_matrix.append(data_row)
         counter += 1
     data_matrix = num.array(data_matrix, num.int8)
     passingdata = PassingData(snp_pos_ls=snp_pos_ls,
                               accession_id_ls=accession_id_ls,
                               name_ls=name_ls,
                               data_matrix=data_matrix)
     sys.stderr.write(' %s accessions, %s bases. Done.\n' %
                      (len(accession_id_ls), len(snp_pos_ls)))
     return passingdata
Example #2
0
	def getAlignmentMatrix(self, alignment_id):
		sys.stderr.write("Getting alignment matrix for alignment=%s ..."%(alignment_id))
		snp_pos_ls = []
		accession_id_ls = []
		name_ls = []
		data_matrix = []
		rows = Sequence.query.filter_by(alignment=alignment_id).order_by(Sequence.accession).all()
		counter = 0
		for row in rows:
			if counter == 0:
				for i in range(len(row.alignment_obj.target)):
					base_number = nt2number[row.alignment_obj.target[i]]
					if base_number!=-1:
						if i==0:
							snp_pos_ls.append((row.alignment_obj.chromosome, row.alignment_obj.start, 0))	#the 3rd position is insertion offset relative to Column position
						else:
							snp_pos_ls.append((row.alignment_obj.chromosome, snp_pos_ls[i-1][1]+1, 0))
					else:	#base is deletion
						if i==0:
							snp_pos_ls.append((row.alignment_obj.chromosome, row.alignment_obj.start-1, 1))	#this probably doesn't exist in db. it's controversal whether this insertion should be assigned to the previous or alignment's start base
						else:
							snp_pos_ls.append((row.alignment_obj.chromosome, snp_pos_ls[i-1][1], snp_pos_ls[i-1][2]+1))	#position doesn't change. offset++
			accession_id_ls.append(row.accession)
			name_ls.append(row.accession_obj.name)
			data_row = dict_map(nt2number, row.bases)
			data_matrix.append(data_row)
			counter += 1
		data_matrix = num.array(data_matrix, num.int8)
		passingdata = PassingData(snp_pos_ls=snp_pos_ls, accession_id_ls=accession_id_ls, name_ls=name_ls, data_matrix=data_matrix)
		sys.stderr.write(' %s accessions, %s bases. Done.\n'%(len(accession_id_ls), len(snp_pos_ls)))
		return passingdata
Example #3
0
    def read_data(cls, input_fname, input_alphabet=0, turn_into_integer=1, double_header=0, delimiter="\t"):
        """
		2008-05-18
			DEPRECATED. moved to pymodule.SNP
		2008-05-12
			add delimiter
		2008-05-07
			add option double_header
		2007-03-06
			different from the one from SelectStrains.py is map(int, data_row)
		2007-05-14
			add input_alphabet
		2007-10-09
			add turn_into_integer
		"""
        sys.stderr.write("Reading data ...")
        reader = csv.reader(open(input_fname), delimiter=delimiter)
        header = reader.next()
        if double_header:
            header = [header, reader.next()]
        data_matrix = []
        strain_acc_list = []
        category_list = []
        for row in reader:
            strain_acc_list.append(row[0])
            category_list.append(row[1])
            data_row = row[2:]
            no_of_snps = len(data_row)
            if input_alphabet:
                data_row = dict_map(nt2number, data_row)
                if no_of_snps != len(data_row):
                    print row
            else:
                if turn_into_integer:
                    data_row = map(int, data_row)
            data_matrix.append(data_row)
        del reader
        sys.stderr.write("Done.\n")
        return header, strain_acc_list, category_list, data_matrix