Example #1
0
def valid_image(filename, no_strict_mrc=False):
    ''' Test if the image is valid
    
    :Parameters:
    
        filename : str
                   Input filename to test
        no_strict_mrc : bool
                        Perform strict MRC header checking (recommended) - Only
                        EPU MRC files and Yifan's frame alignment require this
                        to be off.
    
    :Returns:
        
        flag : bool
               True if image is valid
    '''

    f = util.uopen(filename, 'rb')
    try:
        h = read_mrc_header(f, no_strict_mrc)
        total = file_size(f)
        dtype = numpy.dtype(mrc2numpy[h['mode'][0]])
        return total == (1024 + int(h['nsymbt']) + int(h['nx'][0]) *
                         int(h['ny'][0]) * int(h['nz'][0]) * dtype.itemsize)
    finally:
        util.close(filename, f)
Example #2
0
def valid_image(filename):
    ''' Test if the image is valid
    
    :Parameters:
    
        filename : str
                   Input filename to test
    
    :Returns:
        
        flag : bool
               True if image is valid
    '''

    f = util.uopen(filename, 'rb')
    try:
        h = read_spider_header(f)
        h_len = int(h['labbyt'])
        d_len = int(h['nx']) * int(h['ny']) * int(h['nz'])
        i_len = d_len * 4
        count = count_images(h)
        if count > 1 or h['istack'] == 2:
            return file_size(f) == (h_len + count * (h_len + i_len))
        else:
            return file_size(f) == (h_len + count * i_len)
    finally:
        util.close(filename, f)
Example #3
0
def write_image(filename, img, index=None, header=None, inplace=False):
    ''' Write an image array to a file in the MRC format
    
    :Parameters:
    
    filename : str
               Name of the output file
    img : array
          Image array
    index : int, optional
            Index to write image in the stack
    header : dict, optional
             Dictionary of header values
    inplace : bool
              Write new image to stack without removing the stack
    '''
    if header is None and hasattr(img, 'header'): header=img.header
    
    mode = 'rb+' if index is not None and (index > 0 or inplace and index > -1) else 'wb+'
    f = util.uopen(filename, mode)
    if header is None or not is_format_header(header):
        header = create_header(img.shape, img.dtype, img.order, header)
    try:
        if inplace:
            f.seek(int(header.itemsize+int(header['extended'])+index*img.ravel().shape[0]*img.dtype.itemsize))
        elif f != filename:
            f.seek(0)
            header.tofile(f)
            if index > 0: f.seek(int(header.itemsize+int(header['extended'])+index*img.ravel().shape[0]*img.dtype.itemsize))
        img.tofile(f)
    finally:
        util.close(filename, f)
Example #4
0
def main(args):
    import codecs
    from util import ureader, uwriter, uopen

    def handler(x):
        v = x.object[x.start:x.end]
        print >> stderr, repr(v), v
        return (u'', x.end)

    codecs.register_error('clear', handler)

    if '-t' not in args:
        usage(args)

    tag = map(string.lower, args[1 + args.index('-t')].split(','))
    enc = args[1 + args.index('-e')] if '-e' in args else 'utf8'
    stdin = ureader(sys.stdin) if '-i' not in args else uopen(
        args[1 + args.index('-i')])
    #     stdout = codecs.getwriter(enc)(sys.stdout if '-o' not in args else open(args[1 + args.index('-o')], 'wb'), errors='clear')
    stdout = codecs.getwriter(enc)(
        sys.stdout if '-o' not in args else open(args[1 +
                                                      args.index('-o')], 'wb'))
    stderr = uwriter(sys.stderr)
    for l in strip(stdin.read(), keep=tag):
        try:
            print >> stdout, l
        except UnicodeDecodeError:
            print 'problem with', l
Example #5
0
def read_image(filename, index=None, header=None, cache=None):
    ''' Read an image from the specified file in the WEB format
    
    :Parameters:
    
    filename : str or file object
               Filename or open stream for a file
    index : int, optional
            Index of image to get, if None, first image (Default: None)
    header : dict, optional
             Output dictionary to place header values
    
    :Returns:
        
    out : array
          Array with image information from the file
    '''
    
    idx = 0 if index is None else index
    f = util.uopen(filename, 'rb')
    try:
        h = read_web_header(f)
        #if header is not None: util.update_header(header, h, web2ara, 'web')
        if idx >= count_images(h): raise IOError, "Index exceeds number of images in stack: %d < %d"%(idx, count_images(h))
        offset, ar_args = array_from_header(h)
        f.seek(offset + idx * ar_args[1] * ar_args[0].itemsize)
        out = util.read_image(f, *ar_args)
    finally:
        util.close(filename, f)
    return out
Example #6
0
def iter_images(filename, index=None, header=None):
    ''' Read a set of SPIDER images
    
    :Parameters:
    
    filename : str or file object
               Filename or open stream for a file
    index : int, optional
            Index of image to start, if None, start with the first image (Default: None)
    header : dict, optional
             Output dictionary to place header values
    
    :Returns:
        
    out : array
          Array with image information from the file
    '''
    
    f = util.uopen(filename, 'rb')
    if index is None: index = 0
    try:
        h = read_web_header(f)
        #if header is not None: util.update_header(header, h, web2ara, 'web')
        count = count_images(h)
        offset, ar_args = array_from_header(h)
        f.seek(int(offset))
        if not hasattr(index, '__iter__'): index =  xrange(index, count)
        else: index = index.astype(numpy.int)
        for i in index:
            yield util.read_image(f, *ar_args)
    finally:
        util.close(filename, f)
Example #7
0
def test_m():
    from util import read_seed, N, V
    lang = 'en'
    rels = [(u'1', (u'debt', 'n'), (u'kill', 'v')),
            (u'1', (u'poverty', 'n'), (u'hurl', 'v'))]
    seeds = read_seed(
        uopen(env('{SEED_DIR}/{LANG}/{SEEDS}', SEEDS='seeds.ei', LANG=lang)))
    noun_file, verb_file = cluster[lang]

    def tag_ext(tag):
        return lambda words: extended(tagged(words, tag))

    nclusters = read_clusters(uopen(noun_file), tag_ext(N))
    vclusters = read_clusters(uopen(verb_file), tag_ext(V))
    m = MetaphorBuilder(lang, nclusters, vclusters, seeds)
    pprint(m.find(rels))
Example #8
0
def read_web_header(filename, index=None):
    ''' Read the WEB header
    
    :Parameters:
    
    filename : str or file object
               Filename or open stream for a file
    index : int, ignored
            Index of image to get the header, if None, the stack header (Default: None)
    
    :Returns:
        
    out : array
          Array with header information in the file
    '''
    
    f = util.uopen(filename, 'rb')
    m=None
    try:
        #curr = f.tell()
        h = numpy.fromfile(f, dtype=header_dtype, count=1)
        if not is_readable(h): h = h.byteswap().newbyteorder()
        if not is_readable(h): raise IOError, "Not an WEB file"
        if h['extended_ident'] == 'WEBMETADATA':
            count = h['extended'][0]/metadata_dtype.itemsize
            if (count*metadata_dtype.itemsize) != h['extended'][0]:
                _logger.warn("Unable to read metadata - size mismatch: %d *%d = %d != %d"%(count, metadata_dtype.itemsize, (count*metadata_dtype.itemsize), h['extended'][0]))
            else:
                m = numpy.fromfile(f, dtype=metadata_dtype, count=count)
    finally:
        util.close(filename, f)
    return h, m
Example #9
0
def valid_image(filename):
    ''' Test if the image is valid
    
    :Parameters:
    
        filename : str
                   Input filename to test
    
    :Returns:
        
        flag : bool
               True if image is valid
    '''
    
    f = util.uopen(filename, 'rb')
    try:
        h = read_spider_header(f)
        h_len = int(h['labbyt'])
        d_len = int(h['nx']) * int(h['ny']) * int(h['nz'])
        i_len = d_len * 4
        count = count_images(h)
        if count > 1 or h['istack'] == 2:
            return file_size(f) == (h_len + count * (h_len+i_len))
        else:
            return file_size(f) == (h_len + count * i_len)
    finally:
        util.close(filename, f)
Example #10
0
def read_mrc_header(filename, index=None, no_strict_mrc=False):
    ''' Read the MRC header
    
    :Parameters:
    
    filename : str or file object
               Filename or open stream for a file
    index : int, ignored
            Index of image to get the header, if None, the stack header (Default: None)
    no_strict_mrc : bool
                    Perform strict MRC header checking (recommended) - Only
                    EPU MRC files and Yifan's frame alignment require this
                    to be off.
    
    :Returns:
        
    out : array
          Array with header information in the file
    '''

    f = util.uopen(filename, 'rb')
    try:
        #curr = f.tell()
        h = util.fromfile(f, dtype=header_image_dtype, count=1)
        if not is_readable(h, no_strict_mrc): h = h.newbyteorder()
        if not is_readable(h, no_strict_mrc): raise IOError, "Not MRC header"
    finally:
        util.close(filename, f)
    return h
Example #11
0
def main(args):
    # 1. Read in the seed file
    seeds = rseed(uopen(args[1 + args.index('-i')]))
#     outs = uopen(args[1 + args.index('-o')], mode='w+')
    
    for s in seeds: 
        print s
        pprint(['%s.%s' % (l.name, l.synset.pos) for l in derivations(s.noun)])
        pprint(['%s.%s' % (l.name, l.synset.pos) for l in derivations(s.verb)])
Example #12
0
    def __init__(self, lang, seed_fname, extend_seeds):
        def tag_ext(pos):
            return lambda words: extended(tagged(words, pos))

        def tag(pos):
            return lambda words: tagged(words, pos)

        noun_fn, verb_fn = cluster(lang)
        with uopen(seed_fname) as lines:
            seeds = read_seed(l.rstrip().split() for l in lines)
        op = tag_ext if extend_seeds else tag
        with uopen(noun_fn) as nlines, uopen(verb_fn) as vlines:
            nclusters = read_clusters((l.rstrip().split() for l in nlines),
                                      op(N))
            vclusters = read_clusters((l.rstrip().split() for l in vlines),
                                      op(V))
        update(self,
               mbuilder=MetaphorBuilder(lang, nclusters, vclusters, seeds))
Example #13
0
def test_1():
    parser, base, depn, sentn = sys.argv[1:5]
    # with uopen(depn) as depf, uopen(sentn) as sentf:
    with open(depn) as depf, uopen(sentn) as sentf:
        deps = [l.rstrip() for l in depf]
        sents = [l.rstrip() for l in sentf]
        dump(translate(deps, sents, int(base), parser, None, None),
             uout,
             ensure_ascii=False,
             encoding='utf-8',
             indent=2)
Example #14
0
def scatter(lines, name, target_dir, files_per_dir, chunk_size, ext='ss'):
    lines_per_dir = files_per_dir * chunk_size
    line_groups = groupby(enumerate(lines), lambda (c, _): int(c / lines_per_dir))
    for d, enum_lines in line_groups: 
        dirname = join(target_dir, '%.2x' % d)
        if not exists(dirname):
            makedirs(dirname)
        file_groups = groupby(enum_lines, lambda (c, _): int(c / chunk_size))
        for f, enum_lines_in_file in file_groups:
            target = join(dirname, '%.4x.%s' % (f, ext))
            if True:
                with uopen(target, 'w') as outs:
                    print('writing', target)
                    outs.writelines(l.decode('utf8') for _, l in enum_lines_in_file)
            else:
                print(target)
Example #15
0
def main(args):
    if args.debug_meta:
        entries = args.__dict__
        print('Metaphors for language {lang}, seed file {seed_fn}:'.format(
            **entries))
        for n, v in all_metaphors(**entries):
            print(u'{0[0]}.{0[1]} {1[0]}.{1[1]}'.format(n, v),
                  file=uwriter(sys.stdout))
    else:
        with open_file(args.json_fn) as jsonf:
            json_out = m4detect(json_in=json.load(fp=jsonf, encoding='utf-8'),
                                **args.__dict__)
            if args.out_fn == '-':
                json_dump(obj=json_out, fp=uwriter(sys.stdout))
            else:
                with uopen(args.out_fn, mode='w+b') as out_f:
                    json_dump(obj=json_out, fp=out_f)
Example #16
0
def read_spider_header(filename, index=None):
    ''' Read the SPIDER header
    
    :Parameters:
    
    filename : str or file object
               Filename or open stream for a file
    index : int, optional
            Index of image to get the header, if None, the stack header (Default: None)
    
    :Returns:
        
    out : array
          Array with header information in the file
    '''

    f = util.uopen(filename, 'rb')
    try:
        #curr = f.tell()
        h = numpy.fromfile(f, dtype=header_dtype, count=1)
        if not is_readable(h):
            h = h.newbyteorder()
        if not is_readable(h): raise IOError, "Not a SPIDER file"
        if index is not None:
            h_len = int(h['labbyt'])
            i_len = int(h['nx']) * int(h['ny']) * int(h['nz']) * 4
            count = max(int(h['istack']), 1)
            if index >= count:
                raise IOError, "Index exceeds number of images in stack: %d < %d" % (
                    index, count)
            #offset = index * (h_len+i_len)

            offset = h_len + index * (h_len + i_len) if int(
                h['istack']) > 0 else 0

            try:
                f.seek(offset)
            except:
                _logger.error("Offset: %s" % str(offset))
                raise
            h = numpy.fromfile(f, dtype=h.dtype, count=1)
    finally:
        util.close(filename, f)
    return h
Example #17
0
def read_spider_header(filename, index=None):
    ''' Read the SPIDER header
    
    :Parameters:
    
    filename : str or file object
               Filename or open stream for a file
    index : int, optional
            Index of image to get the header, if None, the stack header (Default: None)
    
    :Returns:
        
    out : array
          Array with header information in the file
    '''
    
    f = util.uopen(filename, 'rb')
    try:
        #curr = f.tell()
        h = numpy.fromfile(f, dtype=header_dtype, count=1)
        if not is_readable(h):
            h = h.newbyteorder()
        if not is_readable(h): raise IOError, "Not a SPIDER file"
        if index is not None:
            h_len = int(h['labbyt'])
            i_len = int(h['nx']) * int(h['ny']) * int(h['nz']) * 4
            count = max(int(h['istack']), 1)
            if index >= count: raise IOError, "Index exceeds number of images in stack: %d < %d"%(index, count)
            #offset = index * (h_len+i_len)
            
            offset = h_len + index * (h_len+i_len) if int(h['istack']) > 0 else 0
            
            try:
                f.seek(offset)
            except:
                _logger.error("Offset: %s"%str(offset))
                raise
            h = numpy.fromfile(f, dtype=h.dtype, count=1)
    finally:
        util.close(filename, f)
    return h
Example #18
0
def valid_image(filename):
    ''' Test if the image is valid
    
    :Parameters:
    
        filename : str
                   Input filename to test
    
    :Returns:
        
        flag : bool
               True if image is valid
    '''
    
    f = util.uopen(filename, 'rb')
    try:
        h = read_web_header(f)
        offset, ar_args = array_from_header(h)
        return file_size(f) == (offset + h['count'] * ar_args[1] * ar_args[0].itemsize)
    finally:
        util.close(filename, f)
Example #19
0
def write_image(filename, img, index=None, header=None, inplace=False):
    ''' Write an image array to a file in the MRC format
    
    :Parameters:
    
    filename : str
               Name of the output file
    img : array
          Image array
    index : int, optional
            Index to write image in the stack
    header : dict, optional
             Dictionary of header values
    inplace : bool
              Write new image to stack without removing the stack
    '''

    #float64
    #complex64

    if header is None and hasattr(img, 'header'): header = img.header
    dtype = numpy.complex64 if numpy.iscomplexobj(img) else numpy.float32
    try:
        img = img.astype(dtype)
    except:
        raise TypeError, "Unsupported type for SPIDER writing: %s" % str(
            img.dtype)

    mode = 'rb+' if index is not None and (
        index > 0 or inplace and index > -1) else 'wb+'
    try:
        f = util.uopen(filename, mode)
    except:
        _logger.error("Mode: %s - Index: %s" % (str(mode), str(index)))
        raise
    try:
        if header is None or not hasattr(
                header, 'dtype') or not is_format_header(header):
            h = numpy.zeros(1, header_dtype)
            even = header[
                'fourier_even'] if header is not None and 'fourier_even' in header else None
            util.update_header(h, spi_defaults, ara2spi)
            header = util.update_header(h, header, ara2spi, 'spi')

            # Image size in header
            header['nx'] = img.T.shape[0]
            header['ny'] = img.T.shape[1] if img.ndim > 1 else 1
            header['nz'] = img.T.shape[2] if img.ndim > 2 else 1

            header['lenbyt'] = img.shape[0] * 4
            header['labrec'] = 1024 / int(header['lenbyt'])
            if 1024 % int(header['lenbyt']) != 0:
                header['labrec'] = int(header['labrec']) + 1
            header['labbyt'] = int(header['labrec']) * int(header['lenbyt'])
            imgsize = img.ravel().shape[0] * 4
            headsize = int(header['labbyt'])
            header['irec'] = header['labrec'] + header['nx']

            #
            #header['irec']
            if numpy.iscomplexobj(img):
                header['iform'] = 3 if img.ndim == 3 else 1
                # determine even or odd Fourier - assumes other dim are padded appropriately
                if even is None:
                    v = int(round(float(img.shape[1]) / img.shape[0]))
                    v = img.shape[1] / v
                    even = (v % 2) == 0
                if even:
                    header['iform'] = -22 if img.ndim == 3 else -12
                else:
                    header['iform'] = -21 if img.ndim == 3 else -11
            else:
                header['iform'] = 3 if img.ndim == 3 else 1

        fheader = numpy.zeros(int(header['labbyt']) / 4, dtype=numpy.float32)
        for name, idx in _header_map.iteritems():
            fheader[idx - 1] = float(header[name])

        if inplace:
            f.seek(index * (imgsize + headsize) + headsize + headsize)
        else:
            if index is not None:
                fheader[_header_map['maxim'] - 1] = index + 1
                fheader[_header_map['imgnum'] - 1] = index + 1
                fheader[_header_map['istack'] - 1] = 2

                f.seek(0)
                fheader.tofile(f)
                fheader[_header_map['istack'] - 1] = 0
                f.seek(index * (imgsize + headsize) + headsize)
                fheader[_header_map['maxim'] - 1] = 0
            fheader.tofile(f)
        img.tofile(f)
    finally:
        util.close(filename, f)
Example #20
0
def main(args):
    lang = args[1 + args.index('-l')]
    nc, vc = [read_clusters(uopen(fname)) for fname in cluster[lang]]
    out(nc, 'n')
    out(vc, 'v')
Example #21
0
def write_image(filename, img, index=None, header=None, inplace=False):
    ''' Write an image array to a file in the MRC format
    
    :Parameters:
    
    filename : str
               Name of the output file
    img : array
          Image array
    index : int, optional
            Index to write image in the stack
    header : dict, optional
             Dictionary of header values
    inplace : bool
              Write new image to stack without removing the stack
    '''
    
    #float64
    #complex64
    
    if header is None and hasattr(img, 'header'): header=img.header
    dtype = numpy.complex64 if numpy.iscomplexobj(img) else numpy.float32
    try: img = img.astype(dtype)
    except: raise TypeError, "Unsupported type for SPIDER writing: %s"%str(img.dtype)
    
    mode = 'rb+' if index is not None and (index > 0 or inplace and index > -1) else 'wb+'
    try:
        f = util.uopen(filename, mode)
    except:
        _logger.error("Mode: %s - Index: %s"%(str(mode), str(index)))
        raise
    try:
        if header is None or not hasattr(header, 'dtype') or not is_format_header(header):
            h = numpy.zeros(1, header_dtype)
            even = header['fourier_even'] if header is not None and 'fourier_even' in header else None
            util.update_header(h, spi_defaults, ara2spi)
            header=util.update_header(h, header, ara2spi, 'spi')
            
            # Image size in header
            header['nx'] = img.T.shape[0]
            header['ny'] = img.T.shape[1] if img.ndim > 1 else 1
            header['nz'] = img.T.shape[2] if img.ndim > 2 else 1
            
            header['lenbyt'] = img.shape[0]*4
            header['labrec'] = 1024 / int(header['lenbyt'])
            if 1024%int(header['lenbyt']) != 0: 
                header['labrec'] = int(header['labrec'])+1
            header['labbyt'] = int(header['labrec'] ) * int(header['lenbyt'])
            imgsize = img.ravel().shape[0]*4
            headsize = int(header['labbyt'])
            header['irec'] = header['labrec']+header['nx']
            
            # 
            #header['irec']
            if numpy.iscomplexobj(img):
                header['iform'] = 3 if img.ndim == 3 else 1
                # determine even or odd Fourier - assumes other dim are padded appropriately
                if even is None:
                    v = int(round(float(img.shape[1])/img.shape[0]))
                    v = img.shape[1]/v
                    even = (v%2)==0
                if even:
                    header['iform'] = -22  if img.ndim == 3 else -12 
                else:
                    header['iform'] = -21  if img.ndim == 3 else -11 
            else:
                header['iform'] = 3 if img.ndim == 3 else 1 
        
        fheader = numpy.zeros(int(header['labbyt'])/4, dtype=numpy.float32)
        for name, idx in _header_map.iteritems(): 
            fheader[idx-1]=float(header[name])
        
        if inplace:
            f.seek(index * (imgsize + headsize)+headsize+headsize)
        else:
            if index is not None:
                fheader[_header_map['maxim']-1] = index+1
                fheader[_header_map['imgnum']-1] = index+1
                fheader[_header_map['istack']-1] = 2
                
                f.seek(0)
                fheader.tofile(f)
                fheader[_header_map['istack']-1] = 0
                f.seek(index * (imgsize + headsize)+headsize)
                fheader[_header_map['maxim']-1] = 0
            fheader.tofile(f)
        img.tofile(f)
    finally:
        util.close(filename, f)
Example #22
0
def iter_images(filename, index=None, header=None, no_strict_mrc=False):
    ''' Read a set of SPIDER images
    
    :Parameters:
    
    filename : str or file object
               Filename or open stream for a file
    index : int, optional
            Index of image to start, if None, start with the first image (Default: None)
    header : dict, optional
             Output dictionary to place header values
    no_strict_mrc : bool
                    Perform strict MRC header checking (recommended) - Only
                    EPU MRC files and Yifan's frame alignment require this
                    to be off.
    
    :Returns:
        
    out : array
          Array with image information from the file
    '''

    f = util.uopen(filename, 'rb')
    if index is None: index = 0
    try:
        h = read_mrc_header(f, no_strict_mrc)
        count = count_images(h)
        #if header is not None:  util.update_header(header, h, mrc2ara, 'mrc')
        tmp = read_header(h)
        if header is not None: header.update(tmp)
        d_len = h['nx'][0] * h['ny'][0]
        dtype = numpy.dtype(mrc2numpy[h['mode'][0]])
        offset = 1024 + int(h['nsymbt']) + 0 * d_len * dtype.itemsize
        try:
            f.seek(int(offset))
        except:
            _logger.error("%s -- %s" %
                          (str(offset), str(offset.__class__.__name__)))
            raise
        if not hasattr(index, '__iter__'): index = xrange(index, count)
        else: index = index.astype(numpy.int)
        last = 0
        total = file_size(f)
        if total != (1024 + int(h['nsymbt']) + int(h['nx'][0]) *
                     int(h['ny'][0]) * int(h['nz'][0]) * dtype.itemsize):
            raise util.InvalidHeaderException, "file size != header: %d != %d -- %d" % (
                total,
                (1024 + int(h['nsymbt']) + int(h['nx'][0]) * int(h['ny'][0]) *
                 int(h['nz'][0]) * dtype.itemsize), int(h['nsymbt']))
        for i in index:
            if i != (last + 1):
                f.seek(
                    int(1024 + int(h['nsymbt']) + i * d_len * dtype.itemsize))
            out = util.fromfile(f, dtype=dtype, count=d_len)

            out = reshape_data(out, h, index, count)
            if header_image_dtype.newbyteorder()[0] == h.dtype[0]:
                out = out.byteswap()
            yield out
    finally:
        util.close(filename, f)
Example #23
0
def read_image(filename, index=None, header=None):
    ''' Read an image from the specified file in the SPIDER format
    
    :Parameters:
    
    filename : str or file object
               Filename or open stream for a file
    index : int, optional
            Index of image to get, if None, first image (Default: None)
    header : dict, optional
             Dictionary to hold header values
             
    :Returns:
        
    out : array
          Array with image information from the file
    '''

    f = util.uopen(filename, 'rb')
    h = None
    try:
        if index is None: index = 0
        h = read_spider_header(f)
        dtype = numpy.dtype(spi2numpy[float(h['iform'])])
        #if header_dtype.newbyteorder()==h.dtype: dtype = dtype.newbyteorder() # - changed
        #if header is not None: util.update_header(header, h, spi2ara, 'spi')
        tmp = read_header(h)
        if header is not None: header.update(tmp)

        h_len = int(h['labbyt'])
        d_len = int(h['nx']) * int(h['ny']) * int(h['nz'])
        i_len = d_len * 4

        count = count_images(h)

        if index >= count:
            raise IOError, "Index exceeds number of images in stack: %d < %d" % (
                index, count)

        if count > 1 and int(h['istack']) == 0:
            raise ValueError, "Improperly formatted SPIDER header - not stack but contains mutliple images"
        offset = h_len * 2 + index * (h_len + i_len) if int(
            h['istack']) > 0 else h_len
        if count > 1 or h['istack'] == 2:
            if file_size(f) != (h_len + count * (h_len + i_len)):
                raise ValueError, "file size != header: %d != %d - count: %d -- nx:%d,ny:%d,nz:%d" % (
                    file_size(f), (h_len + count * (h_len + i_len)), count,
                    int(h['nx']), int(h['ny']), int(h['nz']))
        else:
            if file_size(f) != (h_len + count * i_len):
                f.seek(h_len + index * (h_len + i_len))
                h2 = read_spider_header(f)
                raise ValueError, "file size != header: %d != %d - %d + %d * %d -- %d,%d == %d,%d -- count: " % (
                    file_size(f), (h_len + count * i_len), h_len, count, i_len,
                    int(h['istack']), int(h['imgnum']), int(
                        h2['istack']), int(h2['imgnum']), int(h['maxim']))
        try:
            f.seek(offset)
        except:
            _logger.error("Offset: %s" % str(offset))
            raise
        out = numpy.fromfile(f, dtype=dtype, count=d_len)
        if header_dtype.newbyteorder()[0] == h.dtype[0]: out = out.byteswap()
        #assert(out.ravel().shape[0]==d_len)
        if int(h['nz']) > 1:
            out = out.reshape(int(h['nz']), int(h['ny']), int(h['nx']))
        elif int(h['ny']) > 1:
            try:
                out = out.reshape(int(h['ny']), int(h['nx']))
            except:
                _logger.error("%d != %d*%d = %d" %
                              (out.ravel().shape[0], int(h['nx']), int(
                                  h['ny']), int(h['nx']) * int(h['ny'])))
                raise
    finally:
        util.close(filename, f)
    #if header_image_dtype.newbyteorder()==h.dtype:out = out.byteswap()
    return out
Example #24
0
def open_file(fn):
    _, ext = splitext(fn)
    return ureader(gzip.open(fn)) if ext == '.gz' else uopen(fn)
Example #25
0
def sentence(sent_id, files=512, chunk=4096):
    fn, pos = divmod(sent_id - 1, chunk)
    dn = fn // files
    with uopen(path.join('%.2x' % dn, '%.4x.ss' % fn)) as f:
        return islice(f, pos, pos + 1).next()
Example #26
0
def main(indexname, verb, noun):
    with uopen(indexname, 'r') as stream:
        for i, (_, sentence) in enumerate(find((verb, noun), stream)):
            print(i + 1, sentence)
Example #27
0
    return (l.decode('utf8').rstrip().split(sep)
            for l in f) if decode else (l.rstrip().split(sep) for l in f)


from os import path
from itertools import islice


def sentence(sent_id, files=512, chunk=4096):
    fn, pos = divmod(sent_id - 1, chunk)
    dn = fn // files
    with uopen(path.join('%.2x' % dn, '%.4x.ss' % fn)) as f:
        return islice(f, pos, pos + 1).next()


if __name__ == '__main__':
    args = sys.argv[1:]

    multiple = '-m' in args
    if not multiple:
        main(*args)
    else:
        index, vs, vo = args[1:5]
        r = dict(vo=u'1-компл', vs=u'предик')
        outs = uwriter(sys.stdout)
        with uopen(vs) as vss, uopen(vo) as vos, open(index) as indexs:
            relations = read(split(vss), r['vs']) | read(split(vos), r['vo'])
            #             for k, n, v in relations:
            #                 print(k, n, v, sep=u'\t', file=uwriter(sys.stderr))

            findm(split(indexs, decode=True, sep=u'\t'), relations, outs)
Example #28
0
def read_image(filename, index=None, header=None):
    ''' Read an image from the specified file in the SPIDER format
    
    :Parameters:
    
    filename : str or file object
               Filename or open stream for a file
    index : int, optional
            Index of image to get, if None, first image (Default: None)
    header : dict, optional
             Dictionary to hold header values
             
    :Returns:
        
    out : array
          Array with image information from the file
    '''
    
    f = util.uopen(filename, 'rb')
    h = None
    try:
        if index is None: index = 0
        h = read_spider_header(f)
        dtype = numpy.dtype(spi2numpy[float(h['iform'])])
        #if header_dtype.newbyteorder()==h.dtype: dtype = dtype.newbyteorder() # - changed
        #if header is not None: util.update_header(header, h, spi2ara, 'spi')
        tmp=read_header(h)
        if header is not None: header.update(tmp)
        
        h_len = int(h['labbyt'])
        d_len = int(h['nx']) * int(h['ny']) * int(h['nz'])
        i_len = d_len * 4
        
        count = count_images(h)
        
        if index >= count: raise IOError, "Index exceeds number of images in stack: %d < %d"%(index, count)
        
        if count > 1 and int(h['istack']) == 0: raise ValueError, "Improperly formatted SPIDER header - not stack but contains mutliple images"
        offset = h_len*2 + index * (h_len+i_len) if int(h['istack']) > 0 else h_len
        if count > 1 or h['istack'] == 2:
            if file_size(f) != (h_len + count * (h_len+i_len)): 
                raise ValueError, "file size != header: %d != %d - count: %d -- nx:%d,ny:%d,nz:%d"%(file_size(f), (h_len + count * (h_len+i_len)), count, int(h['nx']), int(h['ny']), int(h['nz']))
        else:
            if file_size(f) != (h_len + count * i_len): 
                f.seek(h_len + index * (h_len+i_len))
                h2 = read_spider_header(f)
                raise ValueError, "file size != header: %d != %d - %d + %d * %d -- %d,%d == %d,%d -- count: "%(file_size(f), (h_len + count * i_len), h_len, count, i_len, int(h['istack']), int(h['imgnum']), int(h2['istack']), int(h2['imgnum']), int(h['maxim'])  )
        try:
            f.seek(offset)
        except:
            _logger.error("Offset: %s"%str(offset))
            raise
        out = numpy.fromfile(f, dtype=dtype, count=d_len)
        if header_dtype.newbyteorder()[0]==h.dtype[0]: out = out.byteswap()
        #assert(out.ravel().shape[0]==d_len)
        if int(h['nz']) > 1:   out = out.reshape(int(h['nz']), int(h['ny']), int(h['nx']))
        elif int(h['ny']) > 1: 
            try:
                out = out.reshape(int(h['ny']), int(h['nx']))
            except:
                _logger.error("%d != %d*%d = %d"%(out.ravel().shape[0], int(h['nx']), int(h['ny']), int(h['nx'])*int(h['ny'])))
                raise
    finally:
        util.close(filename, f)
    #if header_image_dtype.newbyteorder()==h.dtype:out = out.byteswap()
    return out
Example #29
0
def iter_images(filename, index=None, header=None):
    ''' Read a set of SPIDER images
    
    :Parameters:
    
    filename : str or file object
               Filename or open stream for a file
    index : int, optional
            Index of image to start, if None, start with the first image (Default: None)
    header : dict, optional
             Dictionary to hold header values
    
    :Returns:
        
    out : array
          Array with image information from the file
    '''
    
    f = util.uopen(filename, 'rb')
    if index is None: index = 0
    try:
        h = read_spider_header(f)
        dtype = numpy.dtype(spi2numpy[float(h['iform'])])
        #if header_dtype.newbyteorder()==h.dtype: dtype = dtype.newbyteorder()
        #if header is not None: util.update_header(header, h, spi2ara, 'spi')
        tmp=read_header(h)
        if header is not None:  header.update(tmp)
        h_len = int(h['labbyt'])
        d_len = int(h['nx']) * int(h['ny']) * int(h['nz'])
        i_len = d_len * 4
        count = count_images(h)
        if numpy.any(index >= count):  raise IOError, "Index exceeds number of images in stack: %s < %d"%(str(index), count)
        #offset = h_len + 0 * (h_len+i_len)
        
        size = ( h_len + count * (h_len+i_len) ) if int(h['istack']) > 0 else (h_len + i_len)
        
        if file_size(f) != size:
            raise ValueError, "file size != header: %d != %d - %d -- %d,%d,%d"%(file_size(f), (h_len + count * (h_len+i_len)), count, int(h['nx']), int(h['ny']), int(h['nz']))
        try:
            f.seek(h_len)
        except:
            _logger.error("Offset: %s"%str(h_len))
            raise
        if int(h['istack']) == 0: # This file contains a single image!
            out = numpy.fromfile(f, dtype=dtype, count=d_len)
            if header_dtype.newbyteorder()[0]==h.dtype[0]: out = out.byteswap()
            if int(h['nz']) > 1:    out = out.reshape(int(h['nz']), int(h['ny']), int(h['nx']))
            elif int(h['ny']) > 1:  out = out.reshape(int(h['ny']), int(h['nx']))
            yield out
            return
        
        if not hasattr(index, '__iter__'): index =  xrange(index, count)
        else: index = index.astype(numpy.int)
        last=-1
        if count > 1 and int(h['istack']) == 0: raise ValueError, "Improperly formatted SPIDER header - not stack but contains mutliple images"
        
        for i in index:
            if i < 0: raise ValueError, "Cannot have a negative index"
            if i != (last+1): 
                offset = h_len*2 + i * (h_len+i_len)
                try:
                    f.seek(int(offset))
                except:
                    _logger.error("Offset: %s"%str(offset))
                    _logger.error("i: %s"%str(i))
                    raise
            else: 
                f.seek(h_len, 1)
            last=i
            out = numpy.fromfile(f, dtype=dtype, count=d_len)
            if header_dtype.newbyteorder()[0]==h.dtype[0]: out = out.byteswap()
            if int(h['nz']) > 1:   
                try:
                    out = out.reshape(int(h['nz']), int(h['ny']), int(h['nx']))
                except:
                    _logger.error("%d, %d, %d == %d == %d", int(h['nz']), int(h['ny']), int(h['nx']), numpy.prod((int(h['nz']), int(h['ny']), int(h['nx']))), out.shape[0])
                    raise
            elif int(h['ny']) > 1: 
                try:
                    out = out.reshape(int(h['ny']), int(h['nx']))
                except:
                    _logger.error("(%d < %d) -- %d, %d == %d == %d", i, count, int(h['ny']), int(h['nx']), numpy.prod((int(h['ny']), int(h['nx']))), out.shape[0])
                    raise
            yield out
    finally:
        util.close(filename, f)
Example #30
0
def read_image(filename,
               index=None,
               header=None,
               cache=None,
               no_strict_mrc=False,
               force_volume=False):
    ''' Read an image from the specified file in the MRC format
    
    :Parameters:
    
        filename : str or file object
                   Filename or open stream for a file
        index : int, optional
                Index of image to get, if None, first image (Default: None)
        header : dict, optional
                 Output dictionary to place header values
        no_strict_mrc : bool
                        Perform strict MRC header checking (recommended) - Only
                        EPU MRC files and Yifan's frame alignment require this
                        to be off.
        force_volume : bool
                       For image to be read as a volume
    
    :Returns:
            
        out : array
              Array with image information from the file
    '''

    idx = 0 if index is None else index
    f = util.uopen(filename, 'rb')
    try:
        h = read_mrc_header(f, no_strict_mrc)
        #if header is not None: util.update_header(header, h, mrc2ara, 'mrc')
        tmp = read_header(h, force_volume=force_volume)
        if header is not None: header.update(tmp)
        count = count_images(h)
        if idx >= count:
            raise IOError, "Index exceeds number of images in stack: %d < %d" % (
                idx, count)
        if index is None and (count == h['nx'][0] or force_volume):
            d_len = h['nx'][0] * h['ny'][0] * h['nz'][0]
        else:
            d_len = h['nx'][0] * h['ny'][0]
        dtype = numpy.dtype(mrc2numpy[h['mode'][0]])
        offset = 1024 + int(h['nsymbt']) + idx * d_len * dtype.itemsize
        total = file_size(f)
        if total != (1024 + int(h['nsymbt']) + int(h['nx'][0]) *
                     int(h['ny'][0]) * int(h['nz'][0]) * dtype.itemsize):
            raise util.InvalidHeaderException, "file size != header: %d != %d -- %s, %d" % (
                total,
                (1024 + int(h['nsymbt']) + int(h['nx'][0]) * int(h['ny'][0]) *
                 int(h['nz'][0]) * dtype.itemsize), str(idx), int(h['nsymbt']))
        f.seek(int(offset))
        out = util.fromfile(f, dtype=dtype, count=d_len)
        out = reshape_data(out, h, index, count, force_volume)
        if header_image_dtype.newbyteorder()[0] == h.dtype[0]:
            out = out.byteswap()
    finally:
        util.close(filename, f)
    #assert(numpy.alltrue(numpy.logical_not(numpy.isnan(out))))
    #if header_image_dtype.newbyteorder()==h.dtype:out = out.byteswap()
    return out
Example #31
0
def write_image(filename, img, index=None, header=None, inplace=False):
    ''' Write an image array to a file in the MRC format
    
    :Parameters:
    
    filename : str
               Name of the output file
    img : array
          Image array
    index : int, optional
            Index to write image in the stack
    header : dict, optional
             Dictionary of header values
    inplace : bool
              Write new image to stack without removing the stack
    '''

    if header is None and hasattr(img, 'header'): header = img.header
    try:
        img = img.astype(mrc2numpy[numpy2mrc[img.dtype.type]])
    except:
        raise TypeError, "Unsupported type for MRC writing: %s" % str(
            img.dtype)

    mode = 'rb+' if index is not None and (
        index > 0 or inplace and index > -1) else 'wb+'
    f = util.uopen(filename, mode)
    if header is None or not hasattr(header,
                                     'dtype') or not is_format_header(header):
        h = numpy.zeros(1, header_image_dtype)
        util.update_header(h, mrc_defaults, ara2mrc)
        pix = header.get('apix', 1.0) if header is not None else 1.0
        header = util.update_header(h, header, ara2mrc, 'mrc')
        header['nx'] = img.T.shape[0]
        header['ny'] = img.T.shape[1] if img.ndim > 1 else 1
        if header['nz'] == 0:
            header['nz'] = img.shape[2] if img.ndim > 2 else 1
        header['mode'] = numpy2mrc[img.dtype.type]
        header['mx'] = header['nx']
        header['my'] = header['ny']
        header['mz'] = header['nz']
        header['xlen'] = header['nx'] * pix
        header['ylen'] = header['ny'] * pix
        header['zlen'] = header['nz'] * pix
        header['alpha'] = 90
        header['beta'] = 90
        header['gamma'] = 90
        header['mapc'] = 1
        header['mapr'] = 2
        header['maps'] = 3
        header['amin'] = numpy.min(img)
        header['amax'] = numpy.max(img)
        header['amean'] = numpy.mean(img)

        header['map'] = 'MAP'
        header['byteorder'] = byteorderint2[sys.byteorder]  #'DA\x00\x00'
        header['nlabels'] = 1
        header['label0'] = 'Created by Arachnid'

        #header['byteorder'] = numpy.fromstring('\x44\x41\x00\x00', dtype=header['byteorder'].dtype)

        #header['rms'] = numpy.std(img)
        if img.ndim == 3:
            header['nxstart'] = header['nx'] / -2
            header['nystart'] = header['ny'] / -2
            header['nzstart'] = header['nz'] / -2
        if index is not None:
            stack_count = index + 1
            header['nz'] = stack_count
            header['mz'] = stack_count
            header['zlen'] = stack_count
            #header['zorigin'] = stack_count/2.0

    try:
        if inplace:
            f.seek(
                int(1024 + int(h['nsymbt']) +
                    index * img.ravel().shape[0] * img.dtype.itemsize))
        elif f != filename:
            f.seek(0)
            header.tofile(f)
            if index > 0:
                f.seek(
                    int(1024 + int(h['nsymbt']) +
                        index * img.ravel().shape[0] * img.dtype.itemsize))
        img.tofile(f)
    finally:
        util.close(filename, f)
Example #32
0
 def openf(fn):
     return uopen(fn) if fn != '-' else ureader(sys.stdin)
Example #33
0
from __future__ import print_function
"""A replacement for ~katia/JuneSystem/June.sh
@author [email protected]
"""

from util import uopen, Environment

expand = Environment(BASE='/n/shokuji/dc/katia',
                     BNC='/u/metanet/corpolexica/EN/bnc-relations',
                     ADJ='{BNC}/AdverbialModifierForAdjRels.txt-uniqed-sorted',
                     DOBJ='{BNC}/DirectObjRels.txt-uniqed-sorted',
                     SUBJ='{BNC}/SubjectRels.txt-uniqed-sorted',
                     IOBJ='{BNC}/IndirectObjRels.txt-underscore-uniqed-sorted')

_SCOREF = expand('{BASE}/MRC_Conc_All')
score = dict(l.rstrip().split() for l in uopen(_SCOREF))


def concreteness(target):
    return score[target]


def main(relation, target, source):
    pass
Example #34
0
def iter_images(filename, index=None, header=None):
    ''' Read a set of SPIDER images
    
    :Parameters:
    
    filename : str or file object
               Filename or open stream for a file
    index : int, optional
            Index of image to start, if None, start with the first image (Default: None)
    header : dict, optional
             Dictionary to hold header values
    
    :Returns:
        
    out : array
          Array with image information from the file
    '''

    f = util.uopen(filename, 'rb')
    if index is None: index = 0
    try:
        h = read_spider_header(f)
        dtype = numpy.dtype(spi2numpy[float(h['iform'])])
        #if header_dtype.newbyteorder()==h.dtype: dtype = dtype.newbyteorder()
        #if header is not None: util.update_header(header, h, spi2ara, 'spi')
        tmp = read_header(h)
        if header is not None: header.update(tmp)
        h_len = int(h['labbyt'])
        d_len = int(h['nx']) * int(h['ny']) * int(h['nz'])
        i_len = d_len * 4
        count = count_images(h)
        if numpy.any(index >= count):
            raise IOError, "Index exceeds number of images in stack: %s < %d" % (
                str(index), count)
        #offset = h_len + 0 * (h_len+i_len)

        size = (h_len + count *
                (h_len + i_len)) if int(h['istack']) > 0 else (h_len + i_len)

        if file_size(f) != size:
            raise ValueError, "file size != header: %d != %d - %d -- %d,%d,%d" % (
                file_size(f), (h_len + count * (h_len + i_len)), count,
                int(h['nx']), int(h['ny']), int(h['nz']))
        try:
            f.seek(h_len)
        except:
            _logger.error("Offset: %s" % str(h_len))
            raise
        if int(h['istack']) == 0:  # This file contains a single image!
            out = numpy.fromfile(f, dtype=dtype, count=d_len)
            if header_dtype.newbyteorder()[0] == h.dtype[0]:
                out = out.byteswap()
            if int(h['nz']) > 1:
                out = out.reshape(int(h['nz']), int(h['ny']), int(h['nx']))
            elif int(h['ny']) > 1:
                out = out.reshape(int(h['ny']), int(h['nx']))
            yield out
            return

        if not hasattr(index, '__iter__'): index = xrange(index, count)
        else: index = index.astype(numpy.int)
        last = -1
        if count > 1 and int(h['istack']) == 0:
            raise ValueError, "Improperly formatted SPIDER header - not stack but contains mutliple images"

        for i in index:
            if i < 0: raise ValueError, "Cannot have a negative index"
            if i != (last + 1):
                offset = h_len * 2 + i * (h_len + i_len)
                try:
                    f.seek(int(offset))
                except:
                    _logger.error("Offset: %s" % str(offset))
                    _logger.error("i: %s" % str(i))
                    raise
            else:
                f.seek(h_len, 1)
            last = i
            out = numpy.fromfile(f, dtype=dtype, count=d_len)
            if header_dtype.newbyteorder()[0] == h.dtype[0]:
                out = out.byteswap()
            if int(h['nz']) > 1:
                try:
                    out = out.reshape(int(h['nz']), int(h['ny']), int(h['nx']))
                except:
                    _logger.error(
                        "%d, %d, %d == %d == %d", int(h['nz']), int(h['ny']),
                        int(h['nx']),
                        numpy.prod((int(h['nz']), int(h['ny']), int(h['nx']))),
                        out.shape[0])
                    raise
            elif int(h['ny']) > 1:
                try:
                    out = out.reshape(int(h['ny']), int(h['nx']))
                except:
                    _logger.error("(%d < %d) -- %d, %d == %d == %d", i, count,
                                  int(h['ny']), int(h['nx']),
                                  numpy.prod((int(h['ny']), int(h['nx']))),
                                  out.shape[0])
                    raise
            yield out
    finally:
        util.close(filename, f)