コード例 #1
0
  def readConf(self):
    '''Read a configuration file (configobj format) and return a list of Clusters'''
    result = []
    config = ConfigObj(self.filename)

    clusters = self._getConfigValue(config, 'clusters')
    if not isinstance(clusters, list):
      raise SyntaxError('Configuration error [%s] - clusters is not a list. Add a coma to create one' % self.filename)

    for c in iter(self._getConfigValue(config, 'clusters')):
      cluster = Cluster()
      cluster.name = self._getConfigValue(config, c, 'name')
      #print ('Cluster found : %s' % cluster.name)

      servers = self._getConfigValue(config, c, 'servers')
      if not isinstance(servers, list):
        raise SyntaxError('Configuration error [%s] - servers is not a list. Add a coma to create one' % self.filename)

      for s in iter(servers):
        srv = Server()
        srv.ip = self._getConfigValue(config, s, 'ip')
        srv.port = self._getConfigValue(config, s, 'port')
	srv.secure = self._getConfigValue(config, s, 'secure')
        srv.modealt = self._getConfigValue(config, s, 'modealt')
        #print ('Server found : %s:%s' % (srv.ip, srv.port))
	##
	vhosts = self._getConfigValue(config, s, 'vhosts')
	if isinstance(vhosts, list):
	  ## If no vhost defined, switch to a default one
	  if len(vhosts) == 0: srv.add_vhost('')
          for vh in iter(self._getConfigValue(config, s, 'vhosts')):
            vhost_name = self._getConfigValue(config, vh, 'name')
            vhost_burl = self._getConfigValue(config, vh, 'burl')
            #print ('Vhost found : %s/%s' % (vhost_name, vhost_burl))
            srv.add_vhost(vhost_name, vhost_burl)
	else:
	  raise SyntaxError('Configuration error [%s] - [%s].vhosts is not a list. Add a coma to create one' % (self.filename, s))

        cluster.servers.append(srv)

      ## Appending cluster object to returned result
      result.append(cluster)
    return result
コード例 #2
0
    def readConf(self):
        '''Read a configuration file (configobj format) and return a list of Clusters'''
        result = []
        config = ConfigObj(self.filename)

        clusters = self._getConfigValue(config, 'clusters')
        if not isinstance(clusters, list):
            raise SyntaxError(
                'Configuration error [%s] - clusters is not a list. Add a coma to create one'
                % self.filename)

        for c in iter(self._getConfigValue(config, 'clusters')):
            cluster = Cluster()
            cluster.name = self._getConfigValue(config, c, 'name')
            #print ('Cluster found : %s' % cluster.name)

            for s in iter(self._getConfigValue(config, c, 'servers')):
                srv = Server()
                srv.ip = self._getConfigValue(config, s, 'ip')
                srv.port = self._getConfigValue(config, s, 'port')
                srv.secure = self._getConfigValue(config, s, 'secure')
                srv.modealt = self._getConfigValue(config, s, 'modealt')
                #print ('Server found : %s:%s' % (srv.ip, srv.port))
                ##
                vhosts = self._getConfigValue(config, s, 'vhosts')
                if isinstance(vhosts, list):
                    ## If no vhost defined, switch to a default one
                    if len(vhosts) == 0: srv.add_vhost('')
                    for vh in iter(self._getConfigValue(config, s, 'vhosts')):
                        vhost_name = self._getConfigValue(config, vh, 'name')
                        vhost_burl = self._getConfigValue(config, vh, 'burl')
                        #print ('Vhost found : %s/%s' % (vhost_name, vhost_burl))
                        srv.add_vhost(vhost_name, vhost_burl)
                else:
                    raise SyntaxError(
                        'Configuration error [%s] - [%s].vhosts is not a list. Add a coma to create one'
                        % (self.filename, s))

                cluster.servers.append(srv)

            ## Appending cluster object to returned result
            result.append(cluster)
        return result
コード例 #3
0
ファイル: utils.py プロジェクト: 23Skidoo/genomizer-server
class DualSortedReader:
    """Given two sorted files of tags in a format supported by Pyicoteo, iterates through them returning them in order"""
    def __init__(self, file_a_path, file_b_path, format, read_half_open=False, logger=None):
        self.logger = logger
        self.file_a = open(file_a_path)
        self.file_b = open(file_b_path)
        self.current_a = Cluster(cached=False, read=format, read_half_open=read_half_open, logger=self.logger)
        self.current_b = Cluster(cached=False, read=format, read_half_open=read_half_open, logger=self.logger)
        
    def __iter__(self):
        stop_a = True # indicates if the exception StopIteration is raised by file a (True) or file b (False)
        safe_reader = SafeReader(self.logger)
        try:
            while 1:
                if not self.current_a:
                    stop_a = True
                    line_a = self.file_a.next()
                    safe_reader.safe_read_line(self.current_a, line_a)
                
                if not self.current_b:
                    stop_a = False
                    line_b = self.file_b.next()
                    safe_reader.safe_read_line(self.current_b, line_b)
                
                if self.current_a < self.current_b:
                    self.current_a.clear()
                    yield line_a
                else:
                    self.current_b.clear()
                    yield line_b
        except StopIteration: # we still need to print the reminder of the sorter file
            if stop_a:
                while self.file_b:
                    yield line_b
                    line_b = self.file_b.next()
            else:
                while self.file_a:
                    yield line_a
                    line_a = self.file_a.next()
コード例 #4
0
  def readConf(self):
    '''Read a configuration file (configobj format) and return a list of Clusters'''
    config = CP.ConfigParser({'secure': 'false', 'modealt': 'false', })
    config.read(self.filename)
    result = []

    clusters = config.get('main', 'clusters').split(',')

    for c in clusters:
      cluster = Cluster()
      cluster.name = config.get(c, 'name')
      #print ('Cluster found : %s' % cluster.name)

      for s in config.get(c, 'servers').split(','):
        srv = Server()
        srv.ip = config.get(s, 'ip')
        srv.port = config.get(s, 'port')
        srv.secure =  config.getboolean(s, 'secure')
        srv.modealt = config.getboolean(s, 'modealt')
        #print ('Server found : %s:%s' % (srv.ip, srv.port))
        try:
          vhosts = config.get(s, 'vhosts').split(',')
          if len(vhosts) == 0:
            raise CP.NoOptionError
        except CP.NoOptionError:
          srv.add_vhost('')
        else:
          for vh in vhosts:
            vhost_name = config.get(vh, 'name')
            vhost_burl = config.get(vh, 'burl')
            #print ('Vhost found : %s/%s' % (vhost_name, vhost_burl))
            srv.add_vhost(vhost_name, vhost_burl)

        cluster.servers.append(srv)

      ## Appending cluster object to returned result
      result.append(cluster)
    return result
コード例 #5
0
ファイル: utils.py プロジェクト: 23Skidoo/genomizer-server
    def _read_next_tag(self):
        """Loads the cache if the line read by the cursor is not there yet. If the line is empty, it means that the end of file was reached,
        so this function sends a signal for the parent function to halt. If the region is stranded, the only tags returned will be the ones of that strand"""
        try:
            line = self.file_iterator.readline()
        except StopIteration:
            return True

        if line == '':
            return True

        self.current_tag = Cluster(read=self.experiment_format, read_half_open=self.read_half_open, rounding=self.rounding, cached=False, logger=self.logger)
        self.safe_read_line(self.current_tag, line)        
        return False
コード例 #6
0
ファイル: utils.py プロジェクト: 23Skidoo/genomizer-server
    def __init__(self, file_format, read_half_open=False, frag_size=0, id=0, logger=True, filter_chunks=True, push_distance=0, buffer_size = 320000, temp_file_size = 8000000):
        self.logger = logger
        self.file_format = file_format
        self.frag_size = frag_size
        self.push_distance = push_distance
        self.buffer_size = buffer_size
        self.temp_file_size = temp_file_size
        self.filter_chunks = filter_chunks
        try:
            if self.file_format:
                self.cluster = Cluster(read=self.file_format, write=self.file_format, read_half_open=read_half_open, write_half_open=read_half_open, logger=self.logger)
        except ConversionNotSupported:
            self.logger.error('')
            self.logger.error('Reading "%s" is not supported (unknown format).\n'%self.file_format)
            list_available_formats()

        self.id = id
コード例 #7
0
    def get_overlaping_counts(self, region, overlap=1):
        counts = 0
        # load last seek
        self.file_iterator.seek(self.slow_seek)
        self.current_tag = Cluster()
        # advance slow seek 
        while (self.current_tag.name < region.name) or (self.current_tag.name == region.name and region.start > self.current_tag.end):     
            self.slow_seek = self.file_iterator.tell()
            if self._read_next_tag():
                return counts  

        # get intersections
        while self.current_tag.start <= region.end and self.current_tag.name == region.name:
            if self.current_tag.overlap(region) >= overlap:
                if not region.strand or region.strand == self.current_tag.strand:
                    counts += 1

            if self._read_next_tag():
                return counts

        return counts
コード例 #8
0
ファイル: utils.py プロジェクト: 23Skidoo/genomizer-server
 def __initvalues(self):
     self.slow_seek = 0
     self.current_tag = Cluster()
コード例 #9
0
ファイル: utils.py プロジェクト: 23Skidoo/genomizer-server
class SortedFileCountReader:
    """
    Holds a cursor and a file path. Given a start and an end, it iterates through the file starting on the cursor position,
    and retrieves the *counts* (number of reads) that overlap with the region specified. Because this class doesn't store the reads, but only counts them, 
    it doesn't have memory problems when encountering huge clusters of reads.  
    """
    def __init__(self, file_path, experiment_format, read_half_open=False, rounding=True, cached=True, logger=None):
        self.__dict__.update(locals())
        self.file_iterator = open_file(file_path, format=experiment_format, logger=logger)
        if logger:
            self.logger.debug('Fetcher used for %s: Sequential Sorted Counts Reader'%file_path)
        self.safe_reader = SafeReader(logger=logger)
        self.__initvalues()        
    
    def rewind(self):
        """Start again reading the file from the start"""
        self.file_iterator.seek(0)
        self.__initvalues()
        
    def __initvalues(self):
        self.slow_seek = 0
        self.current_tag = Cluster()

    def _read_next_tag(self):
        """Loads the cache if the line read by the cursor is not there yet. If the line is empty, it means that the end of file was reached,
        so this function sends a signal for the parent function to halt. If the region is stranded, the only tags returned will be the ones of that strand"""
        try:
            line = self.file_iterator.readline()
        except StopIteration:
            return True

        if line == '':
            return True

        self.current_tag = Cluster(read=self.experiment_format, read_half_open=self.read_half_open, rounding=self.rounding, cached=False, logger=self.logger)
        self.safe_read_line(self.current_tag, line)        
        return False

    def get_overlaping_counts(self, region, overlap=1):
        counts = 0
        # load last seek
        self.file_iterator.seek(self.slow_seek)
        self.current_tag = Cluster()
        # advance slow seek 
        while (self.current_tag.name < region.name) or (self.current_tag.name == region.name and region.start > self.current_tag.end):     
            self.slow_seek = self.file_iterator.tell()
            if self._read_next_tag():
                return counts  

        # get intersections
        while self.current_tag.start <= region.end and self.current_tag.name == region.name:
            if self.current_tag.overlap(region) >= overlap:
            
                if not region.strand or region.strand == self.current_tag.strand:
                    counts += 1

            if self._read_next_tag():
                return counts

        return counts

    def safe_read_line(self, cluster, line):
        self.safe_reader.safe_read_line(cluster, line)
コード例 #10
0
ファイル: utils.py プロジェクト: 23Skidoo/genomizer-server
 def __init__(self, file_a_path, file_b_path, format, read_half_open=False, logger=None):
     self.logger = logger
     self.file_a = open(file_a_path)
     self.file_b = open(file_b_path)
     self.current_a = Cluster(cached=False, read=format, read_half_open=read_half_open, logger=self.logger)
     self.current_b = Cluster(cached=False, read=format, read_half_open=read_half_open, logger=self.logger)
コード例 #11
0
ファイル: utils.py プロジェクト: 23Skidoo/genomizer-server
class BigSort:
    """
    This class can sort huge files without loading them fully into memory.
    Based on a recipe by Tomasz Bieruta.

    """
    def __init__(self, file_format, read_half_open=False, frag_size=0, id=0, logger=True, filter_chunks=True, push_distance=0, buffer_size = 320000, temp_file_size = 8000000):
        self.logger = logger
        self.file_format = file_format
        self.frag_size = frag_size
        self.push_distance = push_distance
        self.buffer_size = buffer_size
        self.temp_file_size = temp_file_size
        self.filter_chunks = filter_chunks
        try:
            if self.file_format:
                self.cluster = Cluster(read=self.file_format, write=self.file_format, read_half_open=read_half_open, write_half_open=read_half_open, logger=self.logger)
        except ConversionNotSupported:
            self.logger.error('')
            self.logger.error('Reading "%s" is not supported (unknown format).\n'%self.file_format)
            list_available_formats()

        self.id = id
        
    def skipHeaderLines(self, key, experiment_file):
        validLine = False
        count = 0
        while not validLine and count < 400: #file formats with more than 400 lines of header should die anyway 
            try:
                currentPos = experiment_file.tell()
                line = [experiment_file.readline()]
                line.sort(key=key)
                experiment_file.seek(currentPos)
                validLine = True
            except:
                count += 1

    def remove_chunks(self, chunks):
        for chunk in chunks:
            try:
                os.remove(chunk)
            except:
                pass
    
    def filter_chunk(self, chunk):
        filtered_chunk = []
        for line in chunk:
            if self.cluster.reader.quality_filter(line):    
                self.cluster.clear()
                try:           
                    self.cluster.read_line(line)
                    if self.frag_size:
                        self.cluster.extend(self.frag_size)

                    if self.push_distance:
                        self.cluster.push(self.push_distance)

                except InvalidLine:
                    if self.logger: self.logger.debug('Discarding middle invalid line: %s'%line)
                                   
                if not self.cluster.is_empty():
                    filtered_chunk.append(self.cluster.write_line())

        return filtered_chunk

    def sort(self, input, output=None, key=None, tempdirs=[]):
        if key is None: # unless explicitly specified, sort with the default lambda
            key = sorting_lambda(self.file_format)

        if not tempdirs:
            tempdirs.append(gettempdir())

        input_file = open(input,'rb',self.temp_file_size)
        self.skipHeaderLines(key, input_file)
        try:
            input_iterator = iter(input_file)
            chunks = []
            for tempdir in cycle(tempdirs):
                current_chunk = list(islice(input_iterator, self.buffer_size))
                if self.filter_chunks:
                    current_chunk = self.filter_chunk(current_chunk) 
                if current_chunk:
                    if self.logger: self.logger.debug("Chunk: len current_chunk: %s chunks: %s temp_file_size %s buffer_size %s"%(len(current_chunk), len(chunks), self.temp_file_size, self.buffer_size))
                    current_chunk.sort(key=key)
                    output_chunk = open(os.path.join(tempdir,'%06i_%s_%s'%(len(chunks), os.getpid(), self.id)),'w+b',self.temp_file_size)
                    output_chunk.writelines(current_chunk)
                    output_chunk.flush()
                    output_chunk.seek(0)
                    chunks.append(output_chunk.name)
                else:
                    break

        except KeyboardInterrupt: # If there is an interruption, delete all temporary files and raise the exception for further processing.
            print 'Removing temporary files...'
            self.remove_chunks(chunks)
            raise

        finally:
            input_file.close()
        
        if output is None:       
            output = "%s/tempsort%s_%s"%(tempdirs[0], os.getpid(), self.id)
        
        output_file = open(output,'wb',self.temp_file_size)
        
        try:
            output_file.writelines(self.merge(chunks,key))
        finally:
            self.remove_chunks(chunks)

        output_file.close()
        return open(output)

    def merge(self, chunks, key=None):
        if self.logger: self.logger.info("... Merging chunks...")
        if key is None:
            key = lambda x : x

        values = []
        for index, chunk in enumerate(chunks):
            try:
                chunk_file = open(chunk)
                iterator = iter(chunk_file)
                value = iterator.next()
            except StopIteration:
                self.remove_chunks(chunks)
                #try: chunks.remove(chunk) except: pass # igual hay algo magico aqui que se me ha pasado, pero creo que no vale para nada 
            else:
                heappush(values,((key(value), index, value, iterator, chunk_file)))

        while values:
            k, index, value, iterator, chunk = heappop(values)
            yield value
            try:
                value = iterator.next()
            except StopIteration:
                self.remove_chunks(chunks)
                #aqui tambien habia magia remove chunks
            else:
                heappush(values,(key(value),index,value,iterator,chunk))