Example #1
0
 def _doc_finder(self, stream):
     #first find the stream of the document start
     startpos = 0
     import sys
     write('Loading...')
     while startpos < len(stream):
         try:
             startpos += self._event_finder(stream[startpos:],
                                            yaml.events.DocumentEndEvent)
         except:
             startpos = len(stream)
         write(int((100.0 * startpos) / len(stream)), '%')
         yield startpos
Example #2
0
 def _event_finder(self, present, event, end=False):
     for i in yaml.parse(present, Loader=yaml.CLoader):
         if isinstance(i, event):
             if end:
                 key = i.value
                 startpos = i.start_mark.index
                 endpos = startpos + self._find_endblock(present[startpos:])
                 substream = present[i.end_mark.index:endpos]
                 return key, substream, endpos
             else:
                 startpos = i.end_mark.index
                 return startpos
     #in the case of absent event
     if end:
         write('here', present, 'end')
         return 'nothing', '', len(present)
     else:
         return len(present)
Example #3
0
 def save(self):
     write(self.filename, self.data)
Example #4
0
def clean_logfile(logfile_lines, to_remove):
    """Remove yaml fields from a list of lines.

  Removes from a set of lines the yaml_fields contained in the to_remove list.

  Arguments:
      logfile_lines (list): list of the lines of the logfile. Generated from a file by e.g. :py:meth:`~io.IOBase.readlines`.
      to_remove (list): list of keys to remove from logfile_lines

  Returns:
      list of lines where the removed keys have as values the `"<folded>"` string
  """
    line_rev = logfile_lines  #list of the lines of the logfile
    #loop in the reversed from (such as to parse by blocks)
    extra_lines = 20  #internal variable to be customized
    line_rev.reverse()
    #clean the log
    cleaned_logfile = []
    removed = []
    #for line in line_rev: #line_iter:
    while len(line_rev) > 0:
        line = line_rev.pop()
        to_print = line
        #check if the line contains interesting information
        for remove_it in to_remove:
            stream_list = []
            #line without comments
            valid_line = line.split('#')[0]
            spaces = 'nospace'
            #control that the string between the key and the semicolon is only spaces
            if remove_it in valid_line and ":" in valid_line:
                #print "here",remove_it,remove_it in valid_line and ":" in valid_line,valid_line
                starting_point = valid_line.find(remove_it)
                tmp_buf = valid_line[:starting_point]
                #find the closest comma to the staring point, if exists
                tmp_buf = tmp_buf[::-1]
                starting_comma = tmp_buf.find(',')
                if starting_comma < 0: st = 0
                tmp_buf = tmp_buf[st:]
                tmp_buf = tmp_buf[::-1]
                tmp_buf = tmp_buf.strip(' ')
                #print "there",tmp_buf,'starting',starting_point,len(tmp_buf)
                valid_line = valid_line[starting_point + len(remove_it):]
                spaces = valid_line[1:valid_line.find(':')]
                #if remove_it+':' in line.split('#')[0]:
            if len(spaces.strip(' ')) == 0 and len(
                    tmp_buf) == 0:  #this means that the key has been found
                #creates a new Yaml document starting from the line
                #treat the rest of the line following the key to be removed
                header = ''.join(line.split(':')[1:])
                header = header.rstrip() + '\n'
                #eliminate the anchor
                header = header.lstrip(' ')
                header = header.lstrip('*')
                if len(header) > 0:
                    stream_list.append(header)
                #part to be printed, updated
                to_print = line.split(':')[0] + ": <folded> \n"

                #then check when the mapping will end:
                while True:
                    #create a stream with extra_lines block
                    for i in range(0, min(extra_lines, len(line_rev))):
                        stream_list.append(line_rev.pop())
                    #create a stream to be parsed
                    stream = ''.join(stream_list)
                    #then parse the stream until the last valid position has been found
                    try:
                        for i in yaml.parse(stream, Loader=yaml.CLoader):
                            endpos = i.end_mark.index
                    except Exception(e):
                        #  print 'error',str(e),stream
                        #convert back the valid stream into a list
                        #if needed the stream can be loaded into a document
                        item_list = stream[:endpos].split('\n')
                        #if lengths are different there is no need to add lines
                        if len(item_list) != len(stream_list):
                            #last line might be shorter, therefore treat it separately
                            last_line = item_list.pop()
                            #purge the stream
                            for item in item_list:
                                stream_list.remove(item + '\n')
                            #extract the remaining line which should be compared with the last one
                            strip_size = len(last_line.rstrip())
                            if strip_size > 0:
                                first_line = stream_list.pop(0)[strip_size:]
                                if '*' in first_line or '&' in first_line:
                                    first_line = ''  #eliminate anchors
                            else:
                                first_line = ''
                            #then put the rest in the line to be treated
                            to_print.rstrip('\n')
                            to_print += first_line + '\n'
                            # the item has been found
                            break
                stream_list.reverse()
                #put back the unused part in the document
                line_rev.extend(stream_list)
                # mark that the key has been removed
                if (remove_it not in removed):
                    removed.append(remove_it)
                    write('removed: ', remove_it)
    # then print out the line
        cleaned_logfile.append(to_print)

    # check that everything has been removed, at least once
    if (set(removed) != set(to_remove)):
        write('WARNING, not all the requested items have been removed!')
        write('To_remove : ', to_remove)
        write('removed   : ', removed)
        write('Difference: ', list(set(to_remove) - set(removed)))
    return cleaned_logfile
Example #5
0
def load(file=None, stream=None, doc_lists=True, safe_mode=False):
    """Encapsulate the loading of yaml documents.
    
    Provides a dictionary, or a list of dictionaries, which 
    represents the structure of the stream to be loaded.
    It also wraps the yaml loader to perform a optimized parsing when the
    `minloader` of PyYaml 3.13 is available.
    This wrapper ensures to extract from the stream the maximum possible information
    by choosing the best loader available.

    Arguments:
        file (str): path of the yaml-compliant file containing the stream to be loaded

        stream (str): the stream to load, overrides the ``file`` argument if present

        doc_lists (bool): if True, ensures that the results is always in a form 
           of lists of documents, even in the case of a single doc
           When False, the return type is either a dictionary or a generator according
           to the specifications of yaml.load and yaml.load_all respectively.

        safe_mode (bool): When true, in the case of multiple documents 
           in the stream, it loads the document one after another. 
           This is useful to avoid losing of all the document list 
           in the case when one of the document is 
           not yaml compliant, like in the case of a broken logfile. 
           It may works only when the separation of the 
           documents is indicated by the usual syntax ``"---\\n"`` 
           (i.e. no yaml tags between documents)

    Returns:
        * a list of dictionaries, if ``doc_lists`` is set to ``True``;
        * a dictionary, if the stream or the file contains a single yaml document;
        * a generator if the parsed stream is made of multiple documents *and* ``safe_mode`` = ``False``;
        * a list of dictionaries if the stream is made of multiple documents and ``safe_mode`` is ``True``.
    """
    #Detect None otherwise a doc == '' gives an error
    strm = stream if stream != None else open(file, 'r').read()
    #choose the loader
    try:
        ldr = yaml.MinLoader
    except:
        try:
            ldr = yaml.CLoader
        except:
            ldr = yaml.Loader

    #load the documents
    ld = []
    try:
        ld = yaml.load(strm, Loader=ldr)
        if doc_lists: ld = [ld]
    except Exception as e:
        if safe_mode:
            ld = []
            documents = [v for v in strm.split('---\n') if len(v) > 0]
            for i, raw_doc in enumerate(documents):
                try:
                    ld.append(yaml.load(raw_doc, Loader=ldr))
                except Exception as f:
                    write('Document', i, 'of stream NOT loaded, error:', f)
        else:
            ld = yaml.load_all(strm, Loader=ldr)
            if doc_lists: ld = [l for l in ld]
    return ld
Example #6
0
 def save(self):
     write(self.filename, self.data)