def run( self, args ): self.reader = ConanCountsReader( args.cncnt_file_name ) self.writer = ConanSnvMixWriter( args.cnsm_file_name ) ModelRunner.run( self, args )
class ConanModelRunner( ModelRunner ): def __init__( self ): ModelRunner.__init__( self ) self.data_class = JointData self.parameters = {} self.priors = {} def run( self, args ): self.reader = ConanCountsReader( args.cncnt_file_name ) self.writer = ConanSnvMixWriter( args.cnsm_file_name ) ModelRunner.run( self, args ) def _classify( self, args ): cn_states = self.reader.get_cn_states() for cn_state in sorted( cn_states ): self._classify_cn_state( cn_state ) def _classify_cn_state( self, cn_state ): chr_list = self.reader.get_chr_list( cn_state ) for chr_name in sorted( chr_list ): self._classify_chromosome( cn_state, chr_name ) def _train( self, args ): cn_states = self.reader.get_cn_states() for cn_state in sorted( cn_states ): self._train_cn_state( cn_state, args ) self._write_priors() def _train_cn_state( self, cn_state, args ): if args.subsample_size > 0: counts = self._subsample( cn_state, args.subsample_size ) else: counts = self.reader.get_counts() nclass = {} nclass['normal'] = 3 nclass['tumour'] = constants.cn_state_map[cn_state] priors = self._get_priors( nclass ) model = self.model_class( nclass ) data = self.data_class( counts ) self.parameters[cn_state] = model.train( data, priors, args.max_iters, args.convergence_threshold ) self.priors[cn_state] = priors def _classify_chromosome( self, cn_state, chr_name ): nclass = {} nclass['normal'] = 3 nclass['tumour'] = constants.cn_state_map[cn_state] model = self.model_class( nclass ) counts = self.reader.get_counts( cn_state, chr_name ) jcnt_rows = self.reader.get_rows( cn_state, chr_name ) end = self.reader.get_chr_size( cn_state, chr_name ) n = int( 1e5 ) start = 0 stop = min( n, end ) while start < end: sub_counts = counts[start:stop] sub_rows = jcnt_rows[start:stop] data = self.data_class( sub_counts ) resp = model.classify( data, self.parameters[cn_state] ) self.writer.write_data( cn_state, chr_name, sub_rows, resp ) start = stop stop = min( stop + n, end ) def _subsample( self, cn_state, sample_size ): chr_list = self.reader.get_chr_list( cn_state ) sample = [] nrows = self.reader.get_data_set_size( cn_state ) for chr_name in chr_list: chr_size = self.reader.get_chr_size( cn_state, chr_name ) chr_sample_size = math.floor( float( chr_size ) / nrows * sample_size ) chr_sample_size = int( chr_sample_size ) chr_sample_size = min( chr_size, chr_sample_size ) chr_sample_indices = random.sample( xrange( chr_size ), chr_sample_size ) chr_counts = self.reader.get_counts( cn_state, chr_name ) chr_sample = chr_counts[chr_sample_indices] sample.append( chr_sample ) sample = np.vstack( sample ) return sample