コード例 #1
0
ファイル: wind.py プロジェクト: dribnet/wind
  def step(self, dbg=False):

    self.i += 1
    n = self.n
    stp = self.stp

    self.angle += (1-2*random())*self.angle_stp
    angle = self.angle + (1-2*random(n))*self.angle_local_stp

    xy = self.xy[:n,:]
    p = self.p[:n,:]

    tree = kdt(xy)

    new_p = xy + column_stack([cos(angle),sin(angle)]) * stp

    if len(new_p)>0:
      ind = tree.query_ball_point(new_p, stp)
      mask = [i for i,v in enumerate(ind) if not v]

      if mask:

        new_num = len(mask)
        self.p[n:n+new_num] = reshape(mask,(-1,1))
        self.xy[n:n+new_num,:] = new_p[mask,:]

        inside = n+(logical_and(self.xy[n:n+new_num,:]<1.0,
                                self.xy[n:n+new_num,:]>0.0).sum(axis=1)==2)\
          .nonzero()[0]
        li = len(inside)
        self.xy[n:n+li,:] = self.xy[inside,:]
        self.p[n:n+li] = self.p[inside]
        self.n = n + li

    return True
コード例 #2
0
    def __make_sources(self, xx=0.5, yy=0.5, rad=None, domain='rect'):

        from scipy.spatial import cKDTree as kdt
        from scipy.spatial import Delaunay as triag
        from iutils.random import darts
        from iutils.random import darts_rect

        if rad is None:
            rad = self.init_rad

        if domain == 'circ':
            sources = darts(self.init_num, xx, yy, self.init_rad,
                            self.source_dst)
        elif domain == 'rect':
            sources = darts_rect(self.init_num, xx, yy, 2 * rad, 2 * rad,
                                 self.source_dst)
        else:
            raise ValueError('domain must be "rect" or "circ".')
        tree = kdt(sources)
        self.sources = sources
        self.tree = tree
        self.tri = triag(self.sources,
                         incremental=False,
                         qhull_options='QJ Qc')
        self.num_sources = len(self.sources)

        return len(sources)
コード例 #3
0
ファイル: wind.py プロジェクト: dribnet/wind
    def step(self, dbg=False):

        self.i += 1
        n = self.n
        stp = self.stp

        self.angle += (1 - 2 * random()) * self.angle_stp
        angle = self.angle + (1 - 2 * random(n)) * self.angle_local_stp

        xy = self.xy[:n, :]
        p = self.p[:n, :]

        tree = kdt(xy)

        new_p = xy + column_stack([cos(angle), sin(angle)]) * stp

        if len(new_p) > 0:
            ind = tree.query_ball_point(new_p, stp)
            mask = [i for i, v in enumerate(ind) if not v]

            if mask:

                new_num = len(mask)
                self.p[n:n + new_num] = reshape(mask, (-1, 1))
                self.xy[n:n + new_num, :] = new_p[mask, :]

                inside = n+(logical_and(self.xy[n:n+new_num,:]<1.0,
                                        self.xy[n:n+new_num,:]>0.0).sum(axis=1)==2)\
                  .nonzero()[0]
                li = len(inside)
                self.xy[n:n + li, :] = self.xy[inside, :]
                self.p[n:n + li] = self.p[inside]
                self.n = n + li

        return True
コード例 #4
0
ファイル: environment.py プロジェクト: sazamore/RoboSkeeter
    def _calc_kdtree(self, selection = 'interpolated'):
        if self.condition in 'controlControlCONTROL':
            return None

        data = self._select_data(selection)

        zdata = zip(data.x, data.y, data.z)
        return kdt(zdata)
コード例 #5
0
ファイル: spatial.py プロジェクト: connectomics-2015/bable
    def __init__(self, nodes, coords=None):
        # spatial.cKDtree is reported to be 200-1000 times faster
        # however, query_ball_point is only included in very recent scipy
        # packages, not yet shipped
        # with Ubuntu 12.10 (and a manual scipy installation can be  messy)

        self.lookup = list(nodes)

        if not coords:
            self.coords = np.array([x.coord_scaled for x in nodes])

            self.tree = kdt(self.coords)
        else:
            self.coords = coords
            # the ordering of coords and nodes must be the same!!!
            self.tree = kdt(self.coords)

        return
コード例 #6
0
def build_pos_index(paths):
    num = len(paths)
    xs = zeros((2 * num, 2), 'float')
    x_path = zeros(2 * num, 'int')

    for i, (start, stop) in enumerate(paths):
        xs[i, :] = start
        xs[num + i, :] = stop
        x_path[i] = i
        x_path[num + i] = i

    tree = kdt(xs)
    unsorted = set(range(2 * num))
    return tree, xs, x_path, unsorted
コード例 #7
0
def matchPoints(patch1, patch2, coord1, coord2, thresh=0.4):
  """ Matches a pair of keypoints from two images,
  based on feature descriptors (image patches)
  """
  tree = kdt(patch2)
  dists, idx = tree.query(patch1, k=2)

  ratios = dists[:,0]/dists[:,1]
  patch2Idx = idx[ratios < thresh][:,0]
  patch1Idx = np.arange(len(patch1))[ratios < thresh]

  matched1 = coord1[patch1Idx]
  matched2 = coord2[patch2Idx]
  return matched1, matched2
コード例 #8
0
    def _append_tmp_sources(self):

        from scipy.spatial import cKDTree as kdt
        from scipy.spatial import Delaunay as triag

        sources = row_stack([self.sources] + self.tmp_sources)
        tree = kdt(sources)
        self.sources = sources
        self.tree = tree
        self.tmp_sources = []
        self.tri = triag(self.sources,
                         incremental=False,
                         qhull_options='QJ Qc')
        self.num_sources = len(self.sources)

        return len(sources)
コード例 #9
0
ファイル: curve.py プロジェクト: inconvergent/curve
  def _append_tmp_sources(self):

    from scipy.spatial import cKDTree as kdt
    from scipy.spatial import Delaunay as triag

    sources = row_stack([self.sources]+self.tmp_sources)
    tree = kdt(sources)
    self.sources = sources
    self.tree = tree
    self.tmp_sources = []
    self.tri = triag(
      self.sources,
      incremental=False,
      qhull_options='QJ Qc'
    )
    self.num_sources = len(self.sources)

    return len(sources)
コード例 #10
0
ファイル: utils.py プロジェクト: lkluft/windtunnel
def equ_dist_ts(arrival_time, eq_dist_array, data):
    """ Create a time series with constant time steps. The nearest point of the 
   original time series is used for the corresponding time of the equi-distant
   time series.
   @parameter: arrival_time, type = np.array 
   @parameter: eq_dist_array, type = np.array
   @parameter: data, type = np.array"""

    mask = ~np.isnan(data)
    data = data[mask]
    valid = np.arange(data.size)

    tt = kdt(list(zip(arrival_time[valid],
                      np.zeros(arrival_time[valid].size))))
    eq_tt = list(zip(eq_dist_array, np.zeros(eq_dist_array.size)))
    eq_tt = tt.query(eq_tt)[1]
    eq_data = data[valid][eq_tt]
    return eq_data
コード例 #11
0
ファイル: random.py プロジェクト: echarrod/leonardo-dao-vinci
def darts_rect(n, xx, yy, w=1, h=1, dst=0):

    ## remove new nodes that are too close to other
    ## new nodes

    visited = set()
    dartsxy = random_points_in_rectangle(n, xx, yy, w, h)
    tree = kdt(dartsxy)
    near = tree.query_ball_point(dartsxy, dst)
    jj = []
    for j, n in enumerate(near):

        if len(visited.intersection(n)) < 1:
            jj.append(j)
            visited.add(j)

    res = dartsxy[jj, :]
    return res
コード例 #12
0
def spatial_sort_dots_2d(vertices, init_rad=0.01):

  from numpy import array
  from numpy import arange
  from numpy.linalg import norm
  from scipy.spatial import cKDTree as kdt

  num = len(vertices)

  res = []

  unsorted = set(arange(num).astype('int'))

  tree = kdt(vertices)

  count = 0
  pos = array([0,0],'float')

  while count<num:

    rad = init_rad
    while True:

      near = tree.query_ball_point(pos, rad)
      cands = list(set(near).intersection(unsorted))
      if not cands:
        rad *= 2.0
        continue

      dst = norm(pos - vertices[cands,:], axis=1)
      cp = dst.argmin()
      uns = cands[cp]
      break

    path = vertices[uns]

    res.append(path)
    pos = vertices[uns, :]
    unsorted.remove(uns)

    count += 1

  return res
コード例 #13
0
ファイル: random.py プロジェクト: inconvergent/ddd-utils
def darts_rect(n, xx, yy, w=1, h=1, dst=0):


  ## remove new nodes that are too close to other
  ## new nodes

  visited = set()
  dartsxy = random_points_in_rectangle(n, xx, yy, w, h)
  tree = kdt(dartsxy)
  near = tree.query_ball_point(dartsxy, dst)
  jj = []
  for j,n in enumerate(near):

    if len(visited.intersection(n))<1:
      jj.append(j)
      visited.add(j)

  res = dartsxy[jj,:]
  return res
コード例 #14
0
ファイル: curve.py プロジェクト: inconvergent/curve
  def __make_sources(self, xx=0.5, yy=0.5, rad=None, domain='rect'):

    from scipy.spatial import cKDTree as kdt
    from scipy.spatial import Delaunay as triag
    from dddUtils.random import darts
    from dddUtils.random import darts_rect

    if rad is None:
      rad = self.init_rad

    if domain=='circ':
      sources = darts(
        self.init_num,
        xx,
        yy,
        self.init_rad,
        self.source_dst
      )
    elif domain=='rect':
      sources = darts_rect(
        self.init_num,
        xx,
        yy,
        2*rad,
        2*rad,
        self.source_dst
      )
    else:
      raise ValueError('domain must be "rect" or "circ".')
    tree = kdt(sources)
    self.sources = sources
    self.tree = tree
    self.tri = triag(
      self.sources,
      incremental=False,
      qhull_options='QJ Qc'
    )
    self.num_sources = len(self.sources)

    return len(sources)
コード例 #15
0
ファイル: random.py プロジェクト: echarrod/leonardo-dao-vinci
def darts(n, xx, yy, rr, dst):
    """
  get at most n random, uniformly distributed, points in a circle.
  centered at (xx,yy), with radius rr. points are no closer to each other
  than dst.
  """

    ## remove new nodes that are too close to other
    ## new nodes

    visited = set()
    dartsxy = random_points_in_circle(n, xx, yy, rr)
    tree = kdt(dartsxy)
    near = tree.query_ball_point(dartsxy, dst)
    jj = []
    for j, n in enumerate(near):

        if len(visited.intersection(n)) < 1:
            jj.append(j)
            visited.add(j)

    res = dartsxy[jj, :]
    return res
コード例 #16
0
ファイル: random.py プロジェクト: inconvergent/ddd-utils
def darts(n, xx, yy, rr, dst):
  """
  get at most n random, uniformly distributed, points in a circle.
  centered at (xx,yy), with radius rr. points are no closer to each other
  than dst.
  """

  ## remove new nodes that are too close to other
  ## new nodes

  visited = set()
  dartsxy = random_points_in_circle(n, xx, yy, rr)
  tree = kdt(dartsxy)
  near = tree.query_ball_point(dartsxy, dst)
  jj = []
  for j,n in enumerate(near):

    if len(visited.intersection(n))<1:
      jj.append(j)
      visited.add(j)

  res = dartsxy[jj,:]
  return res
コード例 #17
0
def main():
    parser = argparse.ArgumentParser(
        description="Evaluate the n matrix embedding experiment",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "--dictionaries",
        "-d",
        nargs='+',
        type=argparse.FileType('r'),
        default=[
            sys.stdin,
        ],
        help="vocabulary dictionaries of the form word vec with a header")
    parser.add_argument(
        "--infile",
        "-i",
        type=argparse.FileType('r'),
        default=sys.stdin,
        help=
        "evaluation instruction of the form word1 lang1 lang2 [word2]. If word2 is absent it is only predicted, not evaluated"
    )
    parser.add_argument("--modelfiles",
                        "-m",
                        nargs='+',
                        default=[],
                        help="all models input files")
    parser.add_argument(
        "--outfile",
        "-o",
        nargs='?',
        type=argparse.FileType('w'),
        default=sys.stdout,
        help=
        "results file of the form word1 lang1 lang2 word2 [pos wordlist], where the first three fields are identical to eval and the last field is the 1-best prediction. If truth is known, ordinal position of correct answer (-1 if not found) followed by the n-best list in order"
    )
    parser.add_argument(
        "--nbest",
        "-n",
        type=int,
        default=10,
        help="nbest neighbors generated for purposes of evaluation")
    parser.add_argument("--pickle",
                        "-p",
                        action='store_true',
                        default=False,
                        help="dictionaries are pickled with pickle_vocab")
    parser.add_argument("--hidewords",
                        action='store_true',
                        default=False,
                        help="don't actually print nbest words")

    try:
        args = parser.parse_args()
    except IOError as msg:
        parser.error(str(msg))

    infile = args.infile
    dictionaries = [pickle.load(d) for d in args.dictionaries
                    ] if args.pickle else [d for d in args.dictionaries]
    dicts_by_lang = dd(list)
    langdims = dict()
    outfile = args.outfile
    for d in dictionaries:
        if args.pickle:
            lang = d['lang']
            dims = int(d['dim'])
        else:
            info = d.readline().strip().split()
            dims = int(info[1])
            lang = info[2]
        if lang in langdims:
            if dims != langdims[lang]:
                raise ValueError("Multiple dimensions seen for %s: %d and %d" %
                                 (lang, dims, langdims[lang]))
        else:
            langdims[lang] = dims
        dicts_by_lang[lang].append(d)
    inmats = {}
    outmats = {}
    vocab = dd(lambda: dict())
    # for kdt lookup
    targets = dd(list)
    targetvoc = dd(list)
    models = [np.load(x) for x in args.modelfiles]
    for l in list(langdims.keys()):
        inmats[l] = [np.matrix(x['%s_in' % l]) for x in models]
        outmats[l] = [np.matrix(x['%s_out' % l]) for x in models]
        fdim = langdims[l]
        for dfile in dicts_by_lang[l]:
            if args.pickle:
                print("Unpickling for " + l)
                vocab[l].update(dfile['vocab'])
                targets[l].extend(dfile['targets'])
                targetvoc[l].extend(dfile['targetvoc'])
            else:
                print("processing " + dfile.name)
                try:
                    for ln, line in enumerate(dfile):
                        entry = line.strip().split(' ')
                        if len(entry) < fdim + 1:
                            sys.stderr.write(
                                "skipping line %d in %s because it only has %d fields; first field is %s\n"
                                % (ln, dfile.name, len(entry), entry[0]))
                            continue
                        word = ' '.join(entry[:-fdim])
                        vec = np.array(entry[-fdim:]).astype(float)
                        #          print "Adding "+l+" -> "+word
                        vocab[l][word] = vec
                        targets[l].append(vec)
                        targetvoc[l].append(word)
                except:
                    print(dfile.name)
                    print(line)
                    print(len(entry))
                    print(word)
                    print(ln)
                    raise
        # normalize for euclidean distance nearest neighbor => cosine with constant
        targets[l] = kdt(normalize(np.array(targets[l]), axis=1, norm='l2'))
    print("loaded vocabularies")

    for line in infile:
        inst = line.strip().split()
        inword = inst[0]
        inlang = inst[1]
        outlang = inst[2]
        outword = inst[3] if len(inst) > 3 else None
        if inword not in vocab[inlang]:
            sys.stderr.write("Warning: Couldn't find %s -> %s\n" %
                             (inlang, inword))
            continue
        report = inst[:4]
        invec = np.matrix(vocab[inlang][inword])
        for smat, tmat in zip(inmats[inlang], outmats[outlang]):
            xform = np.asarray(invec * smat * tmat)[0]
            neighbors = []
            cosines, cands = targets[outlang].query(xform, args.nbest)
            for cos, cand in zip(cosines, cands):
                neighbors.append((cos, targetvoc[outlang][cand]))
            nb_words = [x[1] for x in neighbors]
            xbest = str(cosine(xform, vocab[outlang][nb_words[0]]))
            if outword is not None:
                truth = vocab[outlang][outword]
                xtruth = str(cosine(xform, truth))
                truthbest = str(cosine(truth, vocab[outlang][nb_words[0]]))
                rank = nb_words.index(outword) if outword in nb_words else -1
                report.append(str(rank))
                report.extend([xtruth, xbest, truthbest])
            else:
                report.append(xbest)
            if not args.hidewords:
                report.extend(nb_words)
        outfile.write('\t'.join(report) + "\n")
コード例 #18
0
ファイル: __tpc__.py プロジェクト: esiwgnahz/porespy
    def run(self, npts=100, r=20, dtransform=None):
        r"""
        Performs the 2-point correlation calculation.

        This method works by selecting a set of **query** points in the void
        space then finding all neighboring points within a specified distance
        of each **query** point that lie in the void space or the solid phase.
        The fraction of points that lie in the void space as a function of
        distance from the query point is returned.

        Parameters
        ----------
        npts : int
            The number of points against which the neighboring points should
            be queried.  The **query** points are randomly selected, so
            repeated calls to run will not necessarily generate identical
            results.  If the results differ too much then ``npts`` should be
            incresed.

        r : scalar or vector
            Controls the radial distance from the query points that are
            considered.  If a scalar is received then a list of sizes between
            1 and ``r`` is generated with a spacing of 1 voxel, otherwise the
            given ``r`` values are used.  It is useful to provide ``r`` values
            to limit the number of points and speed-up the calculation.

        TODO: The methods in here could clearly benefit from proper use of
        itertools, nditer, and other numpy functions.  I can't quite figure
        how to convert meshgrid to vector form.

        """
        if sp.size(r) == 1:
            rmax = r
            sizes = sp.arange(1, rmax)
        else:
            sizes = r
            rmax = r[-1]
        # Extract size metrics from input image
        [Lx, Ly, Lz] = sp.shape(self.image)
        ind = sp.where(self.image == 1)
        temp = sp.random.randint(0, sp.shape(ind)[1], npts)
        i_query = (ind[0][temp], ind[1][temp], ind[2][temp])
        i_void = sp.where(self.image == 1)
        i_solid = sp.where(self.image == 0)

        # Reduce points to only those within rmax of query points
        if dtransform is None:
            imtemp = sp.ones((Lx, Ly, Lz), dtype=bool)
            imtemp[i_query] = False
            dtransform = spim.distance_transform_edt(imtemp)
        mask = dtransform <= rmax
        i_void = sp.where((self.image * mask) == 1)
        i_solid = sp.where(((~self.image) * mask) == 1)

        # Convert matrix into index notation for void and solid phases
        ind_void = sp.vstack(
            (i_void[0].flatten(), i_void[1].flatten(), i_void[2].flatten())).T
        ind_solid = sp.vstack((i_solid[0].flatten(), i_solid[1].flatten(),
                               i_solid[2].flatten())).T
        ind_query = sp.vstack((i_query[0].flatten(), i_query[1].flatten(),
                               i_query[2].flatten())).T

        # Generate kdtrees for void, solid and query points
        dtree_void = kdt(ind_void)
        dtree_solid = kdt(ind_solid)
        dtree_pts = kdt(ind_query)

        # Perform 2-point correlation calculation for range of radii
        print('Checking correlations vs increasing radii')
        print('0%|' + '-' * len(sizes) + '|100%')
        print('  |', end='')
        hits = []
        for r in sizes:
            print('.', end='')
            sys.stdout.flush()
            hits_void = dtree_pts.count_neighbors(other=dtree_void, r=r)
            hits_solid = dtree_pts.count_neighbors(other=dtree_solid, r=r)
            hits.append(hits_void / (hits_solid + hits_void))
        print('|')

        # Store results in namedtuple
        vals = namedtuple('TwoPointCorrelation', ('distance', 'probability'))
        vals.distance = sizes
        vals.probability = hits
        return vals
コード例 #19
0
def main():
    parser = argparse.ArgumentParser(
        description="Show l2norm of all pairwise languages in a trained model",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        "--dictionaries",
        "-d",
        nargs="+",
        type=argparse.FileType("r"),
        default=[sys.stdin],
        help="vocabulary dictionaries of the form word vec with a header",
    )
    parser.add_argument(
        "--infile",
        "-i",
        type=argparse.FileType("r"),
        default=sys.stdin,
        help="evaluation instruction of the form word1 lang1 word2 lang2 ... wordn langn.",
    )
    parser.add_argument("--modelfiles", "-m", nargs="+", default=[], help="all models input files")
    parser.add_argument(
        "--outfile",
        "-o",
        nargs="?",
        type=argparse.FileType("w"),
        default=sys.stdout,
        help="for each model, for each pairwise language, the l2norm",
    )
    parser.add_argument(
        "--pickle", "-p", action="store_true", default=False, help="dictionaries are pickled with pickle_vocab"
    )

    try:
        args = parser.parse_args()
    except IOError as msg:
        parser.error(str(msg))

    infile = args.infile
    dictionaries = [pickle.load(d) for d in args.dictionaries] if args.pickle else [d for d in args.dictionaries]
    dicts_by_lang = dd(list)
    langdims = dict()
    outfile = args.outfile
    for d in dictionaries:
        if args.pickle:
            lang = d["lang"]
            dims = int(d["dim"])
        else:
            info = d.readline().strip().split()
            dims = int(info[1])
            lang = info[2]
        if lang in langdims:
            if dims != langdims[lang]:
                raise ValueError("Multiple dimensions seen for %s: %d and %d" % (lang, dims, langdims[lang]))
        else:
            langdims[lang] = dims
        dicts_by_lang[lang].append(d)
    inmats = {}
    outmats = {}
    vocab = dd(lambda: dict())
    # for kdt lookup
    targets = dd(list)
    targetvoc = dd(list)
    models = [np.load(x) for x in args.modelfiles]
    for l in list(langdims.keys()):
        inmats[l] = [np.matrix(x["%s_in" % l]) for x in models]
        outmats[l] = [np.matrix(x["%s_out" % l]) for x in models]
        fdim = langdims[l]
        for dfile in dicts_by_lang[l]:
            if args.pickle:
                print("Unpickling for " + l)
                vocab[l].update(dfile["vocab"])
                targets[l].extend(dfile["targets"])
                targetvoc[l].extend(dfile["targetvoc"])
            else:
                print("processing " + dfile.name)
                try:
                    for ln, line in enumerate(dfile):
                        entry = line.strip().split(" ")
                        if len(entry) < fdim + 1:
                            sys.stderr.write(
                                "skipping line %d in %s because it only has %d fields; first field is %s\n"
                                % (ln, dfile.name, len(entry), entry[0])
                            )
                            continue
                        word = " ".join(entry[:-fdim])
                        vec = np.array(entry[-fdim:]).astype(float)
                        #          print "Adding "+l+" -> "+word
                        vocab[l][word] = vec
                        targets[l].append(vec)
                        targetvoc[l].append(word)
                except:
                    print(dfile.name)
                    print(line)
                    print(len(entry))
                    print(word)
                    print(ln)
                    raise
        # normalize for euclidean distance nearest neighbor => cosine with constant
        targets[l] = kdt(normalize(np.array(targets[l]), axis=1, norm="l2"))
    print("loaded vocabularies")

    data = dd(list)
    langmap = {}
    for line in infile:
        linedata = line.strip().split()
        for dset, (word, lang) in enumerate(zip(linedata[::2], linedata[1::2])):
            if word not in vocab[lang]:
                sys.stderr.write("Warning: Couldn't find %s -> %s\n" % (lang, word))
                continue
            if dset not in langmap:
                langmap[dset] = lang
            elif langmap[dset] != lang:
                sys.stderr.write("Language collision at %d: %s vs %s\n" % (dset, lang, landmap[dset]))
                sys.exit(1)
            data[dset].append(vocab[lang][word])
    for lang in langmap:
        data[lang] = np.matrix(data[lang])
    langs = len(langmap.keys())
    for m, mfile in enumerate(args.modelfiles):
        for d1 in range(langs):
            l1 = langmap[d1]
            d1xform = data[d1] * inmats[l1][m]
            for d2 in range(d1 + 1, langs):
                l2 = langmap[d2]
                if l2 in inmats:
                    # i-i calculation
                    d2xform = data[d2] * inmats[l2][m]
                    delta = d1xform - d2xform
                    delnorm = LA.norm(delta, ord=2)
                    l2n2 = delnorm * delnorm
                    outfile.write("%s\tii\t%s\t%s\t%f\n" % (mfile, l1, l2, l2n2))
                if l2 in outmats:
                    xform = d1xform * outmats[l2][m]
                    delta = xform - data[d2]
                    delnorm = LA.norm(delta, ord=2)
                    l2n2 = delnorm * delnorm
                    outfile.write("%s\tio\t%s\t%s\t%f\n" % (mfile, l1, l2, l2n2))
コード例 #20
0
                            continue
                        word = ' '.join(entry[:-fdim])
                        vec = np.array(entry[-fdim:]).astype(float)
                        #          print "Adding "+l+" -> "+word
                        vocab[l][word] = vec
                        targets[l].append(vec)
                        targetvoc[l].append(word)
                except:
                    print dfile.name
                    print line
                    print len(entry)
                    print word
                    print ln
                    raise
        # normalize for euclidean distance nearest neighbor => cosine with constant
        targets[l] = kdt(normalize(np.array(targets[l]), axis=1, norm='l2'))
    print "loaded vocabularies"

    for line in infile:
        inst = line.strip().split()
        inword = inst[0]
        inlang = inst[1]
        outlang = inst[2]
        outword = inst[3] if len(inst) > 3 else None
        if inword not in vocab[inlang]:
            #sys.stderr.write("Warning: Couldn't find %s -> %s\n" % (inlang, inword))
            continue
        report = inst[:4]
        invec = np.matrix(vocab[inlang][inword])
        for smat, tmat in zip(inmats[inlang], outmats[outlang]):
            xform = np.asarray(invec * smat * tmat)[0]
コード例 #21
0
def main():
  parser = argparse.ArgumentParser(description="Evaluate the n matrix embedding experiment",
                                   formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument("--dictionaries", "-d", nargs='+', type=argparse.FileType('r'), default=[sys.stdin,], help="vocabulary dictionaries of the form word vec with a header")
  parser.add_argument("--infile", "-i", type=argparse.FileType('r'), default=sys.stdin, help="evaluation instruction of the form word1 lang1 lang2 [word2]. If word2 is absent it is only predicted, not evaluated")
  parser.add_argument("--modelfiles", "-m", nargs='+', default=[], help="all models input files")
  parser.add_argument("--outfile", "-o", nargs='?', type=argparse.FileType('w'), default=sys.stdout, help="results file of the form word1 lang1 lang2 word2 [pos wordlist], where the first three fields are identical to eval and the last field is the 1-best prediction. If truth is known, ordinal position of correct answer (-1 if not found) followed by the n-best list in order")
  parser.add_argument("--nbest", "-n", type=int, default=10, help="nbest neighbors generated for purposes of evaluation")
  parser.add_argument("--pickle", "-p", action='store_true', default=False, help="dictionaries are pickled with pickle_vocab")
  parser.add_argument("--hidewords", action='store_true', default=False, help="don't actually print nbest words")


  try:
    args = parser.parse_args()
  except IOError as msg:
    parser.error(str(msg))


  infile = args.infile
  dictionaries = [pickle.load(d) for d in args.dictionaries] if args.pickle else [d for d in args.dictionaries]
  dicts_by_lang = dd(list)
  langdims = dict()
  outfile = args.outfile
  for d in dictionaries:
    if args.pickle:
      lang = d['lang']
      dims = int(d['dim'])
    else:
      info = d.readline().strip().split()
      dims = int(info[1])
      lang = info[2]
    if lang in langdims:
      if dims != langdims[lang]:
        raise ValueError("Multiple dimensions seen for %s: %d and %d" % (lang, dims, langdims[lang]))
    else:
      langdims[lang]=dims
    dicts_by_lang[lang].append(d)
  inmats = {}
  outmats = {}
  vocab = dd(lambda: dict())
  # for kdt lookup
  targets = dd(list)
  targetvoc = dd(list)
  models = [ np.load(x) for x in args.modelfiles ]
  for l in list(langdims.keys()):
    inmats[l] = [ np.matrix(x['%s_in' % l]) for x in models ]
    outmats[l] = [ np.matrix(x['%s_out' % l]) for x in models ]
    fdim = langdims[l]
    for dfile in dicts_by_lang[l]:
      if args.pickle:
        print("Unpickling for "+l)
        vocab[l].update(dfile['vocab'])
        targets[l].extend(dfile['targets'])
        targetvoc[l].extend(dfile['targetvoc'])
      else:
        print("processing "+dfile.name)
        try:
          for ln, line in enumerate(dfile):
            entry = line.strip().split(' ')
            if len(entry) < fdim+1:
              sys.stderr.write("skipping line %d in %s because it only has %d fields; first field is %s\n" % (ln, dfile.name, len(entry), entry[0]))
              continue
            word = ' '.join(entry[:-fdim])
            vec = np.array(entry[-fdim:]).astype(float)
  #          print "Adding "+l+" -> "+word
            vocab[l][word]=vec
            targets[l].append(vec)
            targetvoc[l].append(word)
        except:
          print(dfile.name)
          print(line)
          print(len(entry))
          print(word)
          print(ln)
          raise
    # normalize for euclidean distance nearest neighbor => cosine with constant
    targets[l] = kdt(normalize(np.array(targets[l]), axis=1, norm='l2'))
  print("loaded vocabularies")

  for line in infile:
    inst = line.strip().split()
    inword = inst[0]
    inlang = inst[1]
    outlang = inst[2]
    outword = inst[3] if len(inst) > 3 else None
    if inword not in vocab[inlang]:
      sys.stderr.write("Warning: Couldn't find %s -> %s\n" % (inlang, inword))
      continue
    report = inst[:4]
    invec = np.matrix(vocab[inlang][inword])
    for smat, tmat in zip(inmats[inlang], outmats[outlang]):
      xform = np.asarray(invec*smat*tmat)[0]
      neighbors = []
      cosines, cands = targets[outlang].query(xform, args.nbest)
      for cos, cand in zip(cosines, cands):
        neighbors.append((cos, targetvoc[outlang][cand]))
      nb_words = [x[1] for x in neighbors]
      xbest=str(cosine(xform, vocab[outlang][nb_words[0]]))
      if outword is not None:
        truth=vocab[outlang][outword]
        xtruth=str(cosine(xform, truth))
        truthbest=str(cosine(truth, vocab[outlang][nb_words[0]]))
        rank = nb_words.index(outword) if outword in nb_words else -1
        report.append(str(rank))
        report.extend([xtruth, xbest, truthbest])
      else:
        report.append(xbest)
      if not args.hidewords:
        report.extend(nb_words)
    outfile.write('\t'.join(report)+"\n")
コード例 #22
0
ファイル: eval_1mat_p3.py プロジェクト: afcarl/deeplangtools
def main():
  parser = argparse.ArgumentParser(description="Evaluate the 1 matrix no interlingus embedding experiment",
                                   formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument("--sourcedictionary", "-S", type=argparse.FileType('r'),  help="source vocabulary dictionary of the form lang word vec; headed by row col")
  parser.add_argument("--targetdictionary", "-T", type=argparse.FileType('r'),  help="target vocabulary dictionary of the form lang word vec; headed by row col")
  parser.add_argument("--infile", "-i", type=argparse.FileType('r'), default=sys.stdin, help="evaluation instruction of the form word1 lang1 lang2 [word2]. If word2 is absent it is only predicted, not evaluated")
  parser.add_argument("--modelfile", "-m", help="all models input file")
  parser.add_argument("--outfile", "-o", nargs='?', type=argparse.FileType('w'), default=sys.stdout, help="results file of the form word1 lang1 lang2 word2 [pos wordlist], where the first three fields are identical to eval and the last field is the 1-best prediction. If truth is known, ordinal position of correct answer (-1 if not found) followed by the n-best list in order")
  parser.add_argument("--nbest", "-n", type=int, default=10, help="nbest neighbors generated for purposes of evaluation")
  parser.add_argument("--pickle", "-p", action='store_true', default=False, help="dictionaries are pickled with pickle_vocab")

  try:
    args = parser.parse_args()
  except IOError as msg:
    parser.error(str(msg))


  infile =  args.infile
  outfile = args.outfile
  sourcedictionary = pickle.load(args.sourcedictionary) if args.pickle else args.sourcedictionary
  targetdictionary = pickle.load(args.targetdictionary) if args.pickle else args.targetdictionary

  dims = {}
  if args.pickle:
    sourcelang = sourcedictionary['lang']
    dims[sourcelang]=int(sourcedictionary['dim'])
    targetlang = targetdictionary['lang']
    dims[targetlang]=int(targetdictionary['dim'])
  else:
    sourceinfo = sourcedictionary.readline().strip().split()
    targetinfo = targetdictionary.readline().strip().split()
    sourcelang=sourceinfo[2]
    targetlang=targetinfo[2]
    dims[sourcelang] = int(sourceinfo[1])
    dims[targetlang] = int(targetinfo[1])
  dicts_by_lang = {}
  dicts_by_lang[sourcelang]=sourcedictionary
  dicts_by_lang[targetlang]=targetdictionary
  sourcedim = dims[sourcelang]
  targetdim = dims[targetlang]
  print(sourcedim,targetdim)


  mat = np.matrix(np.load(args.modelfile)['arr_0'])
  print(mat.shape)

  vocab = dd(lambda: dict())
  if args.pickle:
    print("Unpickling")
    targets = dicts_by_lang[targetlang]['targets']
    targetvoc = dicts_by_lang[targetlang]['targetvoc']
    for lang in (sourcelang, targetlang):
      vocab[lang] = dicts_by_lang[lang]['vocab']
  else:
    print("Loading vocab from text files")
    targets = []
    targetvoc = []

    # load transformation matrices
    # TODO: would be cool if this could exist on-disk in some binary format so only the instructions need be passed in
    # Kludgy: store source and target in different structures

    for lang in (sourcelang, targetlang):
      istarget = lang == targetlang
      fdim = dims[lang]
      dfile = dicts_by_lang[lang]
      try:
        for ln, line in enumerate(dfile):
          entry = line.strip().split(' ')
          if len(entry) < fdim+1:
            sys.stderr.write("skipping line %d in %s because it only has %d fields; first field is %s\n" % (ln, dfile.name, len(entry), entry[0]))
            continue
          word = ' '.join(entry[:-fdim])
          vec = np.array(entry[-fdim:]).astype(float)
          vocab[lang][word]=vec
          if istarget:
            targets.append(vec)
            targetvoc.append(word)

      except:
        print(dfile.name)
        print(line)
        print(len(entry))
        print(word)
        print(ln)
        raise
  targets = kdt(normalize(np.array(targets), axis=1, norm='l2'))
  print("loaded vocabularies")

  for line in infile:
    inst = line.strip().split()
    inword = inst[0]
    inlang = inst[1]
    outlang = inst[2]
    outword = inst[3] if len(inst) > 3 else None
    if inword not in vocab[inlang]:
      sys.stderr.write("Warning: Couldn't find %s -> %s\n" % (inlang, inword))
      continue
    invec = np.matrix(vocab[inlang][inword])
    xform = np.asarray(invec*mat)[0]
    neighbors = []
    cosines, cands = targets.query(xform, args.nbest)
    for cos, cand in zip(cosines, cands):
      neighbors.append((cos, targetvoc[cand]))

    report = inst[:3]
    nb_words = [x[1] for x in neighbors]
    xbest=str(cosine(xform, vocab[outlang][nb_words[0]]))
    if outword is not None and outword in vocab[outlang]:
      report.append(inst[3])
      #cosines: xform to truth, xform to 1best, truth to 1best
      truth=vocab[outlang][outword]
      xtruth=str(cosine(xform, truth))
      truthbest=str(cosine(truth, vocab[outlang][nb_words[0]]))
      rank = nb_words.index(outword) if outword in nb_words else -1
      report.append(str(rank))
      report.extend([xtruth, xbest, truthbest])
    else:
      report.append(xbest)
    report.extend(nb_words)
    outfile.write('\t'.join(report)+"\n")
コード例 #23
0
def spatial_sort(paths, init_rad=0.01):

    from numpy import array
    from numpy import zeros
    from numpy.linalg import norm
    from scipy.spatial import cKDTree as kdt

    num = len(paths)

    res = []

    unsorted = set(range(2 * num))

    xs = zeros((2 * num, 2), 'float')
    x_path = zeros(2 * num, 'int')

    for i, path in enumerate(paths):
        xs[i, :] = path[0, :]
        xs[num + i, :] = path[-1, :]

        x_path[i] = i
        x_path[num + i] = i

    tree = kdt(xs)

    count = 0
    pos = array([0, 0], 'float')

    order = []

    while count < num:

        rad = init_rad
        while True:

            near = tree.query_ball_point(pos, rad)
            cands = list(set(near).intersection(unsorted))
            if not cands:
                rad *= 2.0
                continue

            dst = norm(pos - xs[cands, :], axis=1)
            cp = dst.argmin()
            uns = cands[cp]
            break

        path_ind = x_path[uns]
        path = paths[path_ind]

        if uns >= num:
            res.append(path[::-1])
            pos = paths[path_ind][0, :]
            unsorted.remove(uns)
            unsorted.remove(uns - num)

        else:
            res.append(path)
            pos = paths[path_ind][-1, :]
            unsorted.remove(uns)
            unsorted.remove(uns + num)

        order.append(path_ind)

        count += 1

    return res, order
コード例 #24
0
  sourceinfo = map(int, sourcedictionary.readline().strip().split())
  sourcedim = sourceinfo[1]

  smat = np.matrix(np.load(args.modelfile)['s'])
  tmat = np.matrix(np.load(args.modelfile)['t'])
  print smat.shape
  print tmat.shape
  
  # load transformation matrices
  # TODO: would be cool if this could exist on-disk in some binary format so only the instructions need be passed in
  # Kludgy: store source and target in different structures
  vocab = dd(lambda: dict())
  # for kdt lookup
  pretargets, targetvoc = cPickle.load(args.targetdictionary)
  targets = kdt(pretargets)
  invtargetvoc = dict()
  for key, word in enumerate(targetvoc):
    invtargetvoc[word]=key
  print len(targetvoc)
  
  try:
    for ln, line in enumerate(sourcedictionary):
      entry = line.strip().split(' ')
      if len(entry) < sourcedim+2:
        sys.stderr.write("skipping line %d in %s because it only has %d fields; first field is %s\n" % (ln, sourcedictionary.name, len(entry), entry[0]))
        continue
      lang = entry[0]
      word = ' '.join(entry[1:-sourcedim])
      vec = np.array(entry[-sourcedim:]).astype(float)
      vocab[lang][word]=vec
コード例 #25
0
def main():
    parser = argparse.ArgumentParser(
        description="Show l2norm of all pairwise languages in a trained model",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "--dictionaries",
        "-d",
        nargs='+',
        type=argparse.FileType('r'),
        default=[
            sys.stdin,
        ],
        help="vocabulary dictionaries of the form word vec with a header")
    parser.add_argument(
        "--infile",
        "-i",
        type=argparse.FileType('r'),
        default=sys.stdin,
        help=
        "evaluation instruction of the form word1 lang1 word2 lang2 ... wordn langn."
    )
    parser.add_argument("--modelfiles",
                        "-m",
                        nargs='+',
                        default=[],
                        help="all models input files")
    parser.add_argument(
        "--outfile",
        "-o",
        nargs='?',
        type=argparse.FileType('w'),
        default=sys.stdout,
        help="for each model, for each pairwise language, the l2norm")
    parser.add_argument("--pickle",
                        "-p",
                        action='store_true',
                        default=False,
                        help="dictionaries are pickled with pickle_vocab")

    try:
        args = parser.parse_args()
    except IOError as msg:
        parser.error(str(msg))

    infile = args.infile
    dictionaries = [pickle.load(d) for d in args.dictionaries
                    ] if args.pickle else [d for d in args.dictionaries]
    dicts_by_lang = dd(list)
    langdims = dict()
    outfile = args.outfile
    for d in dictionaries:
        if args.pickle:
            lang = d['lang']
            dims = int(d['dim'])
        else:
            info = d.readline().strip().split()
            dims = int(info[1])
            lang = info[2]
        if lang in langdims:
            if dims != langdims[lang]:
                raise ValueError("Multiple dimensions seen for %s: %d and %d" %
                                 (lang, dims, langdims[lang]))
        else:
            langdims[lang] = dims
        dicts_by_lang[lang].append(d)
    inmats = {}
    outmats = {}
    vocab = dd(lambda: dict())
    # for kdt lookup
    targets = dd(list)
    targetvoc = dd(list)
    models = [np.load(x) for x in args.modelfiles]
    for l in list(langdims.keys()):
        inmats[l] = [np.matrix(x['%s_in' % l]) for x in models]
        outmats[l] = [np.matrix(x['%s_out' % l]) for x in models]
        fdim = langdims[l]
        for dfile in dicts_by_lang[l]:
            if args.pickle:
                print("Unpickling for " + l)
                vocab[l].update(dfile['vocab'])
                targets[l].extend(dfile['targets'])
                targetvoc[l].extend(dfile['targetvoc'])
            else:
                print("processing " + dfile.name)
                try:
                    for ln, line in enumerate(dfile):
                        entry = line.strip().split(' ')
                        if len(entry) < fdim + 1:
                            sys.stderr.write(
                                "skipping line %d in %s because it only has %d fields; first field is %s\n"
                                % (ln, dfile.name, len(entry), entry[0]))
                            continue
                        word = ' '.join(entry[:-fdim])
                        vec = np.array(entry[-fdim:]).astype(float)
                        #          print "Adding "+l+" -> "+word
                        vocab[l][word] = vec
                        targets[l].append(vec)
                        targetvoc[l].append(word)
                except:
                    print(dfile.name)
                    print(line)
                    print(len(entry))
                    print(word)
                    print(ln)
                    raise
        # normalize for euclidean distance nearest neighbor => cosine with constant
        targets[l] = kdt(normalize(np.array(targets[l]), axis=1, norm='l2'))
    print("loaded vocabularies")

    data = dd(list)
    langmap = {}
    for line in infile:
        linedata = line.strip().split()
        for dset, (word, lang) in enumerate(zip(linedata[::2],
                                                linedata[1::2])):
            if word not in vocab[lang]:
                sys.stderr.write("Warning: Couldn't find %s -> %s\n" %
                                 (lang, word))
                continue
            if dset not in langmap:
                langmap[dset] = lang
            elif langmap[dset] != lang:
                sys.stderr.write("Language collision at %d: %s vs %s\n" %
                                 (dset, lang, landmap[dset]))
                sys.exit(1)
            data[dset].append(vocab[lang][word])
    for lang in langmap:
        data[lang] = np.matrix(data[lang])
    langs = len(langmap.keys())
    for m, mfile in enumerate(args.modelfiles):
        for d1 in range(langs):
            l1 = langmap[d1]
            d1xform = data[d1] * inmats[l1][m]
            for d2 in range(d1 + 1, langs):
                l2 = langmap[d2]
                if l2 in inmats:
                    # i-i calculation
                    d2xform = data[d2] * inmats[l2][m]
                    delta = d1xform - d2xform
                    delnorm = LA.norm(delta, ord=2)
                    l2n2 = delnorm * delnorm
                    outfile.write("%s\tii\t%s\t%s\t%f\n" %
                                  (mfile, l1, l2, l2n2))
                if l2 in outmats:
                    xform = d1xform * outmats[l2][m]
                    delta = xform - data[d2]
                    delnorm = LA.norm(delta, ord=2)
                    l2n2 = delnorm * delnorm
                    outfile.write("%s\tio\t%s\t%s\t%f\n" %
                                  (mfile, l1, l2, l2n2))
コード例 #26
0
ファイル: ddd.py プロジェクト: inconvergent/iutils
def spatial_sort_2d(paths, init_rad=0.01):

  from numpy import array
  from numpy import zeros
  from numpy.linalg import norm
  from scipy.spatial import cKDTree as kdt

  num = len(paths)

  res = []

  unsorted = set(range(2*num))

  xs = zeros((2*num,2), 'float')
  x_path = zeros(2*num, 'int')

  for i, path in enumerate(paths):
    xs[i,:] = path[0,:]
    xs[num+i,:] = path[-1,:]

    x_path[i] = i
    x_path[num+i] = i

  tree = kdt(xs)

  count = 0
  pos = array([0,0],'float')

  while count<num:

    rad = init_rad
    while True:

      near = tree.query_ball_point(pos, rad)
      cands = list(set(near).intersection(unsorted))
      if not cands:
        rad *= 2.0
        continue

      dst = norm(pos - xs[cands,:], axis=1)
      cp = dst.argmin()
      uns = cands[cp]
      break

    path_ind = x_path[uns]
    path = paths[path_ind]

    if uns>=num:
      res.append(path[::-1])
      pos = paths[path_ind][0,:]
      unsorted.remove(uns)
      unsorted.remove(uns-num)

    else:
      res.append(path)
      pos = paths[path_ind][-1,:]
      unsorted.remove(uns)
      unsorted.remove(uns+num)

    count += 1

  return res
コード例 #27
0
ファイル: spatial.py プロジェクト: connectomics-2015/bable
 def __setstate__(self, state):
     self.lookup, coords = state
     self.tree = kdt(self.coords)
コード例 #28
0
ファイル: __tpc__.py プロジェクト: zhangwise/porespy
    def run(self, npts=100, r=20, dtransform=None):
        r"""
        Performs the 2-point correlation calculation.

        This method works by selecting a set of **query** points in the void
        space then finding all neighboring points within a specified distance
        of each **query** point that lie in the void space or the solid phase.
        The fraction of points that lie in the void space as a function of
        distance from the query point is returned.

        Parameters
        ----------
        npts : int
            The number of points against which the neighboring points should
            be queried.  The **query** points are randomly selected, so
            repeated calls to run will not necessarily generate identical
            results.  If the results differ too much then ``npts`` should be
            incresed.

        r : scalar or vector
            Controls the radial distance from the query points that are
            considered.  If a scalar is received then a list of sizes between
            1 and ``r`` is generated with a spacing of 1 voxel, otherwise the
            given ``r`` values are used.  It is useful to provide ``r`` values
            to limit the number of points and speed-up the calculation.

        TODO: The methods in here could clearly benefit from proper use of
        itertools, nditer, and other numpy functions.  I can't quite figure
        how to convert meshgrid to vector form.

        """
        if sp.size(r) == 1:
            rmax = r
            sizes = sp.arange(1, rmax)
        else:
            sizes = r
            rmax = r[-1]
        # Extract size metrics from input image
        [Lx, Ly, Lz] = sp.shape(self.image)
        ind = sp.where(self.image == 1)
        temp = sp.random.randint(0, sp.shape(ind)[1], npts)
        i_query = (ind[0][temp], ind[1][temp], ind[2][temp])
        i_void = sp.where(self.image == 1)
        i_solid = sp.where(self.image == 0)

        # Reduce points to only those within rmax of query points
        if dtransform is None:
            imtemp = sp.ones((Lx, Ly, Lz), dtype=bool)
            imtemp[i_query] = False
            dtransform = spim.distance_transform_edt(imtemp)
        mask = dtransform <= rmax
        i_void = sp.where((self.image*mask) == 1)
        i_solid = sp.where(((~self.image)*mask) == 1)

        # Convert matrix into index notation for void and solid phases
        ind_void = sp.vstack((i_void[0].flatten(),
                              i_void[1].flatten(),
                              i_void[2].flatten())).T
        ind_solid = sp.vstack((i_solid[0].flatten(),
                               i_solid[1].flatten(),
                               i_solid[2].flatten())).T
        ind_query = sp.vstack((i_query[0].flatten(),
                               i_query[1].flatten(),
                               i_query[2].flatten())).T

        # Generate kdtrees for void, solid and query points
        dtree_void = kdt(ind_void)
        dtree_solid = kdt(ind_solid)
        dtree_pts = kdt(ind_query)

        # Perform 2-point correlation calculation for range of radii
        print('Checking correlations vs increasing radii')
        print('0%|'+'-'*len(sizes)+'|100%')
        print('  |', end='')
        hits = []
        for r in sizes:
            print('.', end='')
            sys.stdout.flush()
            hits_void = dtree_pts.count_neighbors(other=dtree_void, r=r)
            hits_solid = dtree_pts.count_neighbors(other=dtree_solid, r=r)
            hits.append(hits_void/(hits_solid + hits_void))
        print('|')

        # Store results in namedtuple
        vals = namedtuple('TwoPointCorrelation', ('distance', 'probability'))
        vals.distance = sizes
        vals.probability = hits
        return vals
コード例 #29
0
    sourceinfo = map(int, sourcedictionary.readline().strip().split())
    sourcedim = sourceinfo[1]

    smat = np.matrix(np.load(args.modelfile)['s'])
    tmat = np.matrix(np.load(args.modelfile)['t'])
    print smat.shape
    print tmat.shape

    # load transformation matrices
    # TODO: would be cool if this could exist on-disk in some binary format so only the instructions need be passed in
    # Kludgy: store source and target in different structures
    vocab = dd(lambda: dict())
    # for kdt lookup
    pretargets, targetvoc = cPickle.load(args.targetdictionary)
    targets = kdt(pretargets)
    invtargetvoc = dict()
    for key, word in enumerate(targetvoc):
        invtargetvoc[word] = key
    print len(targetvoc)

    try:
        for ln, line in enumerate(sourcedictionary):
            entry = line.strip().split(' ')
            if len(entry) < sourcedim + 2:
                sys.stderr.write(
                    "skipping line %d in %s because it only has %d fields; first field is %s\n"
                    % (ln, sourcedictionary.name, len(entry), entry[0]))
                continue
            lang = entry[0]
            word = ' '.join(entry[1:-sourcedim])
コード例 #30
0
ファイル: eval_1mat.py プロジェクト: isi-nlp/deeplangtools
            continue
          word = ' '.join(entry[:-fdim])
          vec = np.array(entry[-fdim:]).astype(float)
          vocab[lang][word]=vec
          if istarget:
            targets.append(vec)
            targetvoc.append(word)

      except:
        print dfile.name
        print line
        print len(entry)
        print word
        print ln
        raise
  targets = kdt(normalize(np.array(targets), axis=1, norm='l2'))
  print "loaded vocabularies"

  for line in infile:
    inst = line.strip().split()
    inword = inst[0]
    inlang = inst[1]
    outlang = inst[2]
    outword = inst[3] if len(inst) > 3 else None
    if inword not in vocab[inlang]:
#      sys.stderr.write("Warning: Couldn't find %s -> %s\n" % (inlang, inword))
      continue
    invec = np.matrix(vocab[inlang][inword])
    xform = np.asarray(invec*mat)[0]
    neighbors = []
    cosines, cands = targets.query(xform, args.nbest)
コード例 #31
0
 def __init__(self, mesh, leaf_size=10):
     self.__mesh = mesh
     self.__tree = kdt(mesh.vertices, leaf_size)