Ejemplo n.º 1
0
def utf_uc3(this, utc, it):
    retn = ""
    this = this.encode()
    for i in range(len(this)):
        if i % 2 != 0: retn += scale(10, 15, this[i] + utc - i) + ","
        else:          retn += scale(10, 12, this[i] - it + i) + ","
    return retn[0:-1]
Ejemplo n.º 2
0
 def get_next_input():
     '''
         It reads an entire event file and its reference. When the file reaches the end it picks another one at random
         and it does the same reading process. The function returns the next event input of the current file,
         in a sliding window.
     '''
     if (basecallTraining.current_file == "" or (basecallTraining.current_file_length - basecallTraining.current_index_input) <= utils.batch_size):
         basecallTraining.resetValues()
         train_files = basecallTraining.getFiles(basecallTraining.train_dir)
         num = random.randrange(0,len(train_files))
         target_file = train_files[num]
         with open(basecallTraining.train_dir+target_file) as f:
             lines = f.readlines()
         basecallTraining.current_lines = lines
         basecallTraining.current_file = target_file
         basecallTraining.current_file_length = len(lines)
         fast5file = basecallTraining.current_lines[0].split()[4]
         basecallTraining.current_scale, basecallTraining.current_scale_sd, basecallTraining.current_shift = scale.get_scale_and_shift(fast5file, 1, "template")
         input = []
         for x in range(1,len(basecallTraining.current_lines)):
             input.append(basecallTraining.current_lines[x].split())
             mean = input[len(input)-1][0]
             stdv = input[len(input)-1][1]
             mean, stdv = scale.scale(mean,stdv,basecallTraining.current_scale,basecallTraining.current_scale_sd,basecallTraining.current_shift)
             input[len(input) - 1][0] = mean
             input[len(input) - 1][1] = stdv
         input.pop(0)
         basecallTraining.current_input = input
         basecallTraining.current_bases_per_event_ratio = float(float(len(basecallTraining.current_lines[0].split()[3]))/float(len(input)))
     ret = np.asarray(basecallTraining.current_input)[basecallTraining.current_index_input:(basecallTraining.current_index_input + utils.batch_size)]
     basecallTraining.current_index_input += 2
     return ret
Ejemplo n.º 3
0
def weight_matrix(training_filename, get_scale=False, delim=','):
    """
    Returns the weight matrix built from the data in the file
    at the filename given as the first argument, scaled according to
    the values in the file.
    """
    training_scale = scale(training_filename, delim)
    with open(training_filename, 'r') as training_file:
        for line in training_file:
            x_i, y_i = parse_vectors(line, training_scale, delim)
            try:
                sum_xi += x_i * x_i.T
                sum_yi += x_i * y_i.T
            except NameError:
                sum_xi = x_i * x_i.T
                sum_yi = x_i * y_i.T
            except ValueError: # row has differing number of attributes
                if sum_xi.shape[0] < x_i.shape[0]:
                    # more attributes, use this as the standard
                    sum_xi = x_i * x_i.T
                    sum_yi = x_i * y_i.T
                else:
                    # less attributes, ignore it
                    pass
    try:
        W = sum_xi.I * sum_yi # will raise exception if no inverse
    except linalg.LinAlgError:
        W = (sum_xi + 0.00001*identity(sum_xi.shape[0])).I * sum_yi
    if get_scale:
        return W, training_scale
    return W
def test_scale(filename, result_dir=None):
    input_img = Image.open(filename)

    cases = []
    cases.append([(192, 128), (96, 64), (48, 32), (24, 16), (12, 8)])
    cases.append([(300, 200), ])
    cases.append([(450, 300), ])
    cases.append([(500, 200), ])
    count = 1

    for case in cases:
        for size in case:
            print "Scaling Case %d" % (count, )
            result = scale(input_img, size)
            result_size = result.size
            comparison = "expected size %s, actual size %s" % (
                str(size), str(result_size))

            count += 1

            expect(
                result_size == size,
                "[PASS] Scaling: " + comparison,
                "[FAIL] Scaling: " + comparison)

            if result_dir:
                result_name = 'scale-%d-%d.png' % size
                result_path = os.path.join(result_dir, result_name)
                result.save(result_path)
                print case_message('[Saved] ' + result_path)
Ejemplo n.º 5
0
def encrypt(string, _set, extra):
    if _set.encode == "UTF": uc3 = utf_uc3
    else:                    uc3 = uni_uc3
    _time = str(int(time() * 1000))
    part1 = int(_time[10:13])
    part2 = string
    part3 = round(random() * 100)
    part4 = int(_time[0:5])
    part5 = round(random() * 10)
    if part5 == 0: part5 = 10
    part6 = int(_time[5:10])

    ectpart1 = scale(10, 15, str(part1)[::-1])
    ectpart2 = uc3(part2, part1, part4)
    ectpart3 = scale(10, 3, part3)
    ectpart4 = scale(10, 36, part4 + 15 - int(ectpart3))[::-1]
    ectpart5 = scale(10, 9, part5)
    ectpart6 = scale(10, 35, part6 - 15 * part5)[::-1]

    retn = "~" + ectpart1 + "!" + ectpart2 + "!"
    retn += ",".join((ectpart3, ectpart4, ectpart5, ectpart6))
    return retn
Ejemplo n.º 6
0
    def build (self, X, targetA, autoscale=False):

        nobj, nvar= np.shape(X)

        self.nobj = nobj
        self.nvar = nvar

        self.X = X

        X, mu = center(X)
        X, wg = scale (X, autoscale)

        self.mu = mu
        self.wg = wg
        self.autoscale = autoscale

        SSXac=0.0

        for a in range(targetA):
            # extracts LV
            t, p = self.extractPC(X)

            self.t.append(t)
            self.p.append(p)

            # deflates X
            X, SSX, SSXex = self.deflatePC(X,t,p)

            SSXac += SSXex
            
            self.SSXex.append(SSXex)
            self.SSXac.append(SSXac)
            
            if a==0:
                self.SSX = SSX

        self.A = targetA
Ejemplo n.º 7
0
Archivo: pls.py Proyecto: tljm/eTOXlab
    def validateLOO(self, A, gui=False):
        """ Validates A dimensions of an already built PLS model, using Leave-One-Out cross-validation

            Returns nothing. The results of the cv (SSY, SDEP and Q2) are stored internally
        """

        if self.X == None or self.Y == None:
            return

        X = self.X
        Y = self.Y

        nobj, nvarx = np.shape(X)

        SSY0 = 0.0
        for i in range(nobj):
            SSY0 += np.square(Y[i] - np.mean(Y))

        SSY = np.zeros(A, dtype=np.float64)
        YP = np.zeros((nobj, A + 1), dtype=np.float64)

        if gui: updateProgress(0.0)

        for i in range(nobj):

            # build reduced X and Y matrices removing i object
            Xr = np.delete(X, i, axis=0)
            Yr = np.delete(Y, i)

            Xr, muxr = center(Xr)
            Xr, wgxr = scale(Xr, self.autoscale)

            Yr, muyr = center(Yr)

            xp = np.copy(X[i, :])

            xp -= muxr
            xp *= wgxr

            # predicts y for the i object, using A LV
            yp = self.getLOO(Xr, Yr, xp, A)
            yp += muyr

            # updates SSY with the object i errors
            YP[i, 0] = Y[i]

            for a in range(A):
                SSY[a] += np.square(yp[a] - Y[i])
                YP[i, a + 1] = yp[a]

            if gui: updateProgress(float(i) / float(nobj))

        if gui: print

        self.SSY = SSY
        self.SDEP = [np.sqrt(i / nobj) for i in SSY]
        self.Q2 = [1.00 - (i / SSY0) for i in SSY]

        self.Av = A

        return (YP)
Ejemplo n.º 8
0
from scale import scale
from users.models import User
from django.contrib.sessions.models import Session
from django.utils import timezone

s = scale()

# example data - no need for separate file - will eventually just query docker to see what values are available.
challengeID = 3
registeredUsers = 10
activeSessions = 10
activeContainers = 10
minimumContainers = 2

try:
    buf = s.calculateBuffer(registeredUsers, activeSessions, minimumContainers,
                            activeContainers, challengeID)

except Exception as ex:
    print(ex)

if buf == activeContainers:
    print("buffer ({0}) is exactly the # of active contianers({1})".format(
        buf, activeContainers))

elif buf < activeContainers:
    print(
        "calculated buffer ({0}) is less than the # of active contianers({1}) \n\r{2} containers required (-1 to account for rounding calculation)"
        .format(buf, activeContainers, buf - activeContainers + 1)
    )  # +1 to account for rounding up on buffer calculation but still erring on the side of caution for removing containers
Ejemplo n.º 9
0
Archivo: pls.py Proyecto: tljm/eTOXlab
    def build(self, X, Y, targetA=0, targetSSX=0.0, autoscale=False):
        """Build a new PLS model with the X and Y numpy matrice provided using NIPALS algorithm

           The dimensionality of the model can be defined either providing
           1. directly the number of LV to extract (targetA)
           2. the fraction of SSX that the model will explain (targetSSX)

           The X and Y matrices are centered but no other scaling transform is applied

           Does not return anything, but updates internals vectors and variables
        """
        nobj, nvarx = np.shape(X)

        ##        for i in range (nobj):
        ##            for j in range (nvarx):
        ##                print X[i,j],
        ##            print

        self.nobj = nobj
        self.nvarx = nvarx
        self.X = X.copy()
        self.Y = Y.copy()

        self.X, self.mux = center(self.X)
        self.Y, self.muy = center(self.Y)
        self.X, self.wgx = scale(self.X, autoscale)

        ##        self.mux = mux
        ##        self.muy = muy
        ##        self.wgx = wgx

        self.autoscale = autoscale

        SSXac = 0.0
        SSYac = 0.0

        SSX0, SSY0, null = self.computeSS(self.X, self.Y)

        SSXold = SSX0
        SSYold = SSY0

        a = 0
        while True:
            t, p, w, c = self.extractLV(self.X, self.Y)

            self.t.append(t)
            self.p.append(p)
            self.w.append(w)
            self.c.append(c)

            self.X, self.Y = self.deflateLV(self.X, self.Y, t, p, c)

            SSXnew, SSYnew, dmodx = self.computeSS(self.X, self.Y)

            SSXex = (SSXold - SSXnew) / SSX0
            SSXac += SSXex

            SSYex = (SSYold - SSYnew) / SSY0
            SSYac += SSYex

            SDEC = np.sqrt(SSYnew / nobj)

            dof = nvarx - a
            if dof <= 0: dof = 1
            dmodx = [np.sqrt(d / dof) for d in dmodx]

            SSXold = SSXnew
            SSYold = SSYnew

            self.SSXex.append(SSXex)
            self.SSXac.append(SSXac)
            self.SSYex.append(SSYex)
            self.SSYac.append(SSYac)
            self.SDEC.append(SDEC)
            self.dmodx.append(dmodx)

            a += 1

            if targetA > 0:
                if a == targetA: break

            if targetSSX > 0.0:
                if SSXac > targetSSX: break
                # prevents to extract a meaningless number of LV
                if a > min(20, nobj / 5): break

        self.Am = a

        # NIPALS is destructive, so we must retrieve X and Y from original data for validation
        self.X = X.copy()
        self.Y = Y.copy()

        self.cutoff = np.zeros(self.Am, dtype=np.float64)
        self.TP = np.zeros(self.Am)
        self.TN = np.zeros(self.Am)
        self.FP = np.zeros(self.Am)
        self.FN = np.zeros(self.Am)

        self.TPpred = np.zeros(self.Am)
        self.TNpred = np.zeros(self.Am)
        self.FPpred = np.zeros(self.Am)
        self.FNpred = np.zeros(self.Am)
Ejemplo n.º 10
0
            type_assign = dict((k, []) for k in TET)
            for k, m in zip(TET, ea):
                type_assign[k] = m

            for e in TG.edges(data=True):
                ty = e[2]['type']
                for k in type_assign:
                    if ty == k or (ty[1], ty[0]) == k:
                        e[2]['cifname'] = type_assign[k]

            ea_dict = assign_node_vecs2edges(TG, unit_cell, SYMMETRY_TOL)
            all_SBU_coords = SBU_coords(TG, ea_dict,
                                        CONNECTION_SITE_BOND_LENGTH)
            sc_a, sc_b, sc_c, sc_alpha, sc_beta, sc_gamma, sc_covar, Bstar_inv, max_length, callbackresults, ncra, ncca = scale(
                all_SBU_coords, a, b, c, ang_alpha, ang_beta, ang_gamma,
                max_le, num_vertices, Bstar, alpha, num_edges, FIX_UC,
                SCALING_ITERATIONS, PRE_SCALE, SCALING_CONVERGENCE_TOLERANCE,
                SCALING_STEP_SIZE)

            print '*******************************************'
            print 'The scaled unit cell parameters are : '
            print '*******************************************'
            print 'a    :', np.round(sc_a, 5)
            print 'b    :', np.round(sc_b, 5)
            print 'c    :', np.round(sc_c, 5)
            print 'alpha:', np.round(sc_alpha, 5)
            print 'beta :', np.round(sc_beta, 5)
            print 'gamma:', np.round(sc_gamma, 5)
            print ''

            for sc, name in zip((sc_a, sc_b, sc_c), ('a', 'b', 'c')):
Ejemplo n.º 11
0
except AssertionError:
    ordinals = ["st", "nd", "rd"] + ["th"] * 7
    formatted_num = f"{len(inputs) + 1}{ordinals[len(inputs) % 10]}"
    total_score = int(total_score) if round(
        total_score) == total_score else total_score
    print(
        f"Error: the {formatted_num} input is not in range (0 - {total_score})."
    )
    sys.exit(1)

display.display_info([x * total_score for x in inputs],
                     header="input data statistics",
                     total_score=total_score)

print()
scale_mean = display.input_float(
    prompt="What is the target average (mean) percentage (0 - 100)> ",
    qualifier=lambda x: 0 <= x <= 100,
    qualifier_err="Error: please input a number between 0 and 100.") / 100

outputs = scale.scale(inputs, scale_mean)

print()
print("Below are the scaled scores (in the order they were entered):")
print("\n".join("{:.2f}".format(total_score * n) for n in outputs))

print()
display.display_info([x * total_score for x in outputs],
                     header="output data statistics",
                     total_score=total_score)
Ejemplo n.º 12
0
    def build (self, X, Y, targetA=0, targetSSX=0.0, autoscale=False):
        """Build a new PLS model with the X and Y numpy matrice provided using NIPALS algorithm

           The dimensionality of the model can be defined either providing
           1. directly the number of LV to extract (targetA)
           2. the fraction of SSX that the model will explain (targetSSX)

           The X and Y matrices are centered but no other scaling transform is applied

           Does not return anything, but updates internals vectors and variables
        """
        nobj, nvarx= np.shape(X)

##        for i in range (nobj):
##            for j in range (nvarx):
##                print X[i,j],
##            print

        self.nobj = nobj
        self.nvarx = nvarx
        self.X = X.copy()
        self.Y = Y.copy()

        self.X, self.mux = center(self.X)
        self.Y, self.muy = center(self.Y)
        self.X, self.wgx = scale(self.X, autoscale)

##        self.mux = mux
##        self.muy = muy
##        self.wgx = wgx

        self.autoscale = autoscale
        
        SSXac=0.0
        SSYac=0.0

        SSX0,SSY0, null = self.computeSS(self.X,self.Y)
        
        SSXold=SSX0
        SSYold=SSY0

        

        a=0
        while True:
            t, p, w, c = self.extractLV(self.X, self.Y)
                
            self.t.append(t) 
            self.p.append(p)
            self.w.append(w)
            self.c.append(c)
            
            self.X, self.Y = self.deflateLV(self.X, self.Y, t, p, c)
            
            SSXnew, SSYnew, dmodx = self.computeSS(self.X, self.Y)

            SSXex = (SSXold-SSXnew)/SSX0
            SSXac+=SSXex

            SSYex = (SSYold-SSYnew)/SSY0
            SSYac+=SSYex

            SDEC = np.sqrt(SSYnew/nobj)

            dof = nvarx-a
            if dof <= 0 : dof = 1
            dmodx = [np.sqrt(d/dof) for d in dmodx] 

            SSXold=SSXnew
            SSYold=SSYnew

            self.SSXex.append(SSXex)
            self.SSXac.append(SSXac)
            self.SSYex.append(SSYex)
            self.SSYac.append(SSYac)
            self.SDEC.append(SDEC)
            self.dmodx.append(dmodx)
            
            a+=1
                
            if targetA>0:
                if a==targetA : break

            if targetSSX>0.0:
                if SSXac>targetSSX: break
                # prevents to extract a meaningless number of LV
                if a > min (20,nobj/5) : break 

        self.Am=a
            
        # NIPALS is destructive, so we must retrieve X and Y from original data for validation
        self.X = X.copy()
        self.Y = Y.copy()
        
        self.cutoff = np.zeros(self.Am, dtype=np.float64)
        self.TP = np.zeros(self.Am)
        self.TN = np.zeros(self.Am)
        self.FP = np.zeros(self.Am)
        self.FN = np.zeros(self.Am)

        self.TPpred = np.zeros(self.Am)
        self.TNpred = np.zeros(self.Am)
        self.FPpred = np.zeros(self.Am)
        self.FNpred = np.zeros(self.Am)
Ejemplo n.º 13
0
    def __init__(self, tgt_name, test_size=0.15):
        self.test_size = test_size
        self.over_sampler = 'None'
        self.resampled = False
        self.tgt_name = tgt_name
        self.pat_frame = gbl.pat_frame.copy()
        self.con_frame = gbl.con_frame.copy()

        self.pat_frame_stats = gbl.pat_frame_stats.copy()

        self.n_rand_feat = 0 #copy(gbl.n_rand_feat)

        self.num_pats = self.pat_frame.shape[0]
        self.pat_names = self.pat_frame.index.tolist()
        self.num_cons = self.con_frame.shape[0]

        # read in target vector according to tgt variable
        self.y_tgt = pd.DataFrame({tgt_name: self.pat_frame_stats.loc[:, tgt_name]})
        if 'class' in tgt_name:
            self.tgt_task = gbl.clf
            self.y_strat = self.y_tgt.copy()
        else:
            self.tgt_task = gbl.reg
            y_strat_name = 'YBOCS_class3'
            self.y_strat = pd.DataFrame({y_strat_name: self.pat_frame_stats.loc[:, y_strat_name]})

        # extract train and test set names

        # pat_names_train, pat_names_test = train_test_split(pat_names,
        #                                                    test_size=self.test_size,
        #                                                    stratify=y_clf)
        #                                                    #random_state=random.randint(1, 101))
        self.pat_names_bins = {}
        self.pat_names_test_bins = {}
        self.pat_names_train_bins = {}
        self.bin_keys = np.unique(self.y_strat.iloc[:, 0])
        self.num_bins = len(self.bin_keys)
        self.multiclass = False
        if self.tgt_task is 'clf' and self.num_bins > 2:
            self.multiclass = True

        self.pat_names_test, self.pat_names_train = self._split_test_train_names(self.y_strat, self.test_size,
                                                                                 self.num_bins)

        # check if test and train names are mutually exclusive and add up to total observations
        result = any(elem in self.pat_names_train for elem in self.pat_names_test)
        print('%s: test/train %d/%d from %d' % (self.tgt_name, len(self.pat_names_test), len(self.pat_names_train),
                                                self.y_strat.shape[0]))
        if not set(self.pat_names) == set(self.pat_names_test + self.pat_names_train) or result:
            print('%s: error separating train test' % (tgt_name))
            exit()

        self.cv_folds = 10
        # assign train and test
        self.pat_frame_train = self.pat_frame.loc[self.pat_names_train, :]
        self.pat_frame_train_y = self.y_tgt.loc[self.pat_names_train, :]

        self.pat_frame_test = self.pat_frame.loc[self.pat_names_test, :]
        self.pat_frame_test_y = self.y_tgt.loc[self.pat_names_test, :]

        # save base copies from train for different oversampling
        self._pat_frame_train_base = self.pat_frame_train.copy(deep=True)
        self._pat_frame_train_y_base = self.pat_frame_train_y.copy(deep=True)

        #  scale data
        self.pat_frame_train_norm, self.pat_train_scaler = scale(self.pat_frame_train)
        self.pat_frame_test_norm = test_set_scale(self.pat_frame_test, self.pat_train_scaler)

        # if self.tgt_task == 'reg' leave it else check if imbalanced classes
        self.imbalanced_classes = False
        if self.tgt_task == 'clf':
            # check if any classes have more than 1 std away from mean number of observations per class
            if any(abs(self.y_tgt.iloc[:, 0].value_counts() - np.mean(self.y_tgt.iloc[:, 0].value_counts())) >
                   np.std(self.y_tgt.iloc[:, 0].value_counts())):
                self.imbalanced_classes = True

        # con
        self.con_frame_norms, self.con_scalers = scale(self.con_frame)
Ejemplo n.º 14
0
    print("Created {0}".format(args.outdir))

# Read data from csv and filter only the required columns and remove empty or NaN fields
filter_names = ['Distance', 'ArrDelay', 'CRSDepTime', 'DayOfWeek', 'DepTime']
data = pd.read_csv(args.filename, delimiter=',', usecols=filter_names).dropna(axis=0)

covariate = pd.DataFrame({'Late':     data.ArrDelay.apply(cov.calculate_late),
                          'Night':    data.CRSDepTime.apply(cov.calculate_night),
                          'Weekend':  data.DayOfWeek.apply(cov.calculate_weekend),
                          'DepHour':  data.DepTime.apply(cov.calculate_dephour),
                          'Distance': data.Distance.apply(cov.calculate_distance)
                          }).values

Y = covariate[:,0]
X = covariate[:,1:covariate.shape[1]]
X_tr, X_ts, Y_tr, Y_ts = train_test_split(X, Y, test_size=(1-args.ratio), random_state=4)
# Scale datasets
scaling = scale.scale(args.scaler, X_tr)
Xtr_scale = scaling.transform(X_tr)
Xts_scale = scaling.transform(X_ts)
# Add bias
Xtr_scale = np.insert(Xtr_scale, 0, 1.0, axis=1)
Xts_scale = np.insert(Xts_scale, 0, 1.0, axis=1)

Y_tr = Y_tr.reshape((Y_tr.shape[0], 1))
Y_ts =Y_ts.reshape((Y_ts.shape[0], 1))
util.write_csv(args.outdir + '/X_train.csv', 'x', Xtr_scale, args.precision)
util.write_csv(args.outdir + '/Y_train.csv', 'y', Y_tr, args.precision)
util.write_csv(args.outdir + '/X_test.csv', 'x', Xts_scale, args.precision)
util.write_csv(args.outdir + '/Y_test.csv', 'y', Y_ts, args.precision)
Ejemplo n.º 15
0
import pyaudio
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation

from scale import scale

FORMAT = pyaudio.paInt16
CHANNELS = 1
CHUNK = 2000
RATE = 44100

a = scale()

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.set_xscale('log')
ax.set_xlim((a.freqs[0], a.freqs[-1]))
ax.set_ylim((0, 1500))
ax.set_xticks(a.freqs[2:-2])
ax.set_xticklabels(a.notes[2:-2])
line, = ax.plot([], [], c='k', lw=1)


def init():
    line.set_data([], [])
    return line,


def animate(i):
    data = np.fromstring(stream.read(CHUNK), dtype=np.int16)
Ejemplo n.º 16
0
def run_template(template):

    print()
    print(
        '========================================================================================================='
    )
    print('template :', template)
    print(
        '========================================================================================================='
    )
    print()

    cat_count = 0
    for net in ct2g(template):

        cat_count += 1
        TG, start, unit_cell, TVT, TET, TNAME, a, b, c, ang_alpha, ang_beta, ang_gamma, max_le, catenation = net

        node_cns = [(cncalc(node, 'nodes', ONE_ATOM_NODE_CN), node)
                    for node in os.listdir('nodes')]

        print('Number of vertices = ', len(TG.nodes()))
        print('Number of edges = ', len(TG.edges()))
        print()

        if PRINT:

            print('There are', len(TG.nodes()),
                  'vertices in the voltage graph:')
            print()
            v = 0

            for node in TG.nodes():
                v += 1
                print(v, ':', node)
                node_dict = TG.node[node]
                print('type : ', node_dict['type'])
                print('cartesian coords : ', node_dict['ccoords'])
                print('fractional coords : ', node_dict['fcoords'])
                print('degree : ', node_dict['cn'][0])
                print()

            print('There are', len(TG.edges()), 'edges in the voltage graph:')
            print()

            for edge in TG.edges(data=True, keys=True):
                edge_dict = edge[3]
                ind = edge[2]
                print(ind, ':', edge[0], edge[1])
                print('length : ', edge_dict['length'])
                print('type : ', edge_dict['type'])
                print('label : ', edge_dict['label'])
                print('positive direction :', edge_dict['pd'])
                print('cartesian coords : ', edge_dict['ccoords'])
                print('fractional coords : ', edge_dict['fcoords'])
                print()

        vas = vertex_assign(TG, TVT, node_cns, unit_cell, ONE_ATOM_NODE_CN,
                            USER_SPECIFIED_NODE_ASSIGNMENT, SYMMETRY_TOL,
                            ALL_NODE_COMBINATIONS)
        CB, CO = cycle_cocyle(TG)

        for va in vas:
            if len(va) == 0:
                print(
                    'At least one vertex does not have a building block with the correct number of connection sites.'
                )
                print('Moving to the next template...')
                print()
                continue

        if len(CB) != (len(TG.edges()) - len(TG.nodes()) + 1):
            print('The cycle basis is incorrect.')
            print(
                'The number of cycles in the cycle basis does not equal the rank of the cycle space.'
            )
            print('Moving to the next tempate...')
            continue

        num_edges = len(TG.edges())
        Bstar, alpha = Bstar_alpha(CB, CO, TG, num_edges)

        if PRINT:
            print(
                'B* (top) and alpha (bottom) for the barycentric embedding are:'
            )
            print()
            for i in Bstar:
                print(i)
            print()
            for i in alpha:
                print(i)
            print()

        num_vertices = len(TG.nodes())

        if COMBINATORIAL_EDGE_ASSIGNMENT:
            eas = list(
                itertools.product([e for e in os.listdir('edges')],
                                  repeat=len(TET)))
        else:
            edge_files = sorted([e for e in os.listdir('edges')])
            eas = []
            i = 0
            while len(eas) < len(TET):
                eas.append(edge_files[i])
                i += 1
                if i == len(edge_files):
                    i = 0
            eas = [eas]

        g = 0

        for va in vas:

            node_elems = [bbelems(i[1], 'nodes') for i in va]
            metals = [[i for i in j if i in metal_elements]
                      for j in node_elems]
            metals = list(set([i for j in metals for i in j]))

            v_set = [('v' + str(vname_dict[re.sub('[0-9]', '', i[0])]), i[1])
                     for i in va]
            v_set = sorted(list(set(v_set)), key=lambda x: x[0])
            v_set = [v[0] + '-' + v[1] for v in v_set]

            print(
                '++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'
            )
            print('vertex assignment : ', v_set)
            print(
                '++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'
            )
            print()

            if SINGLE_METAL_MOFS_ONLY and len(metals) != 1:
                print(
                    v_set,
                    'contains no metals or multiple metal elements, no cif will be written'
                )
                print()
                continue

            for v in va:
                for n in TG.nodes(data=True):
                    if v[0] == n[0]:
                        n[1]['cifname'] = v[1]

            for ea in eas:

                g += 1

                print(
                    '++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'
                )
                print('edge assignment : ', ea)
                print(
                    '++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'
                )
                print()

                type_assign = dict((k, []) for k in TET)
                for k, m in zip(TET, ea):
                    type_assign[k] = m

                for e in TG.edges(data=True):
                    ty = e[2]['type']
                    for k in type_assign:
                        if ty == k or (ty[1], ty[0]) == k:
                            e[2]['cifname'] = type_assign[k]

                ea_dict = assign_node_vecs2edges(TG, unit_cell, SYMMETRY_TOL)

                all_SBU_coords = SBU_coords(TG, ea_dict,
                                            CONNECTION_SITE_BOND_LENGTH)
                sc_a, sc_b, sc_c, sc_alpha, sc_beta, sc_gamma, sc_covar, Bstar_inv, max_length, callbackresults, ncra, ncca, scaling_data = scale(
                    all_SBU_coords, a, b, c, ang_alpha, ang_beta, ang_gamma,
                    max_le, num_vertices, Bstar, alpha, num_edges, FIX_UC,
                    SCALING_ITERATIONS, PRE_SCALE,
                    SCALING_CONVERGENCE_TOLERANCE, SCALING_STEP_SIZE)

                print('*******************************************')
                print('The scaled unit cell parameters are : ')
                print('*******************************************')
                print('a    :', np.round(sc_a, 5))
                print('b    :', np.round(sc_b, 5))
                print('c    :', np.round(sc_c, 5))
                print('alpha:', np.round(sc_alpha, 5))
                print('beta :', np.round(sc_beta, 5))
                print('gamma:', np.round(sc_gamma, 5))
                print()

                for sc, name in zip((sc_a, sc_b, sc_c), ('a', 'b', 'c')):
                    cflag = False
                    if sc < 1.0:
                        print('unit cell parameter', name,
                              'has collapsed during scaling!')
                        print(
                            'try re-running with', name,
                            'fixed, with a larger value for PRE_SCALE, or with a higher SCALING_CONVERGENCE_TOLERANCE'
                        )
                        print('no cif will be written')
                        cflag = True

                if cflag:
                    continue

                scaled_params = [sc_a, sc_b, sc_c, sc_alpha, sc_beta, sc_gamma]

                sc_Alpha = np.r_[alpha[0:num_edges - num_vertices + 1, :],
                                 sc_covar]
                sc_omega_plus = np.dot(Bstar_inv, sc_Alpha)

                ax = sc_a
                ay = 0.0
                az = 0.0
                bx = sc_b * np.cos(sc_gamma * pi / 180.0)
                by = sc_b * np.sin(sc_gamma * pi / 180.0)
                bz = 0.0
                cx = sc_c * np.cos(sc_beta * pi / 180.0)
                cy = (sc_c * sc_b * np.cos(sc_alpha * pi / 180.0) -
                      bx * cx) / by
                cz = (sc_c**2.0 - cx**2.0 - cy**2.0)**0.5
                sc_unit_cell = np.asarray([[ax, ay, az], [bx, by, bz],
                                           [cx, cy, cz]]).T

                scaled_coords = omega2coords(
                    start, TG, sc_omega_plus,
                    (sc_a, sc_b, sc_c, sc_alpha, sc_beta, sc_gamma),
                    num_vertices, template, g, WRITE_CHECK_FILES)
                nvecs, evecs = scaled_node_and_edge_vectors(
                    scaled_coords, sc_omega_plus, sc_unit_cell, ea_dict)
                placed_nodes, node_bonds = place_nodes(
                    nvecs, CHARGES, ORIENTATION_DEPENDENT_NODES)
                placed_edges, edge_bonds = place_edges(evecs, CHARGES,
                                                       len(placed_nodes))

                if RECORD_CALLBACK:

                    vnames = '_'.join([v.split('.')[0] for v in v_set])

                    if len(ea) <= 5:
                        enames = '_'.join([e[0:-4] for e in ea])
                    else:
                        enames = str(len(ea)) + '_edges'

                    prefix = template[0:-4] + '_' + vnames + '_' + enames

                    frames = scaling_callback_animation(
                        callbackresults, alpha, Bstar_inv, ncra, ncca,
                        num_vertices, num_edges, TG, template, g, False)
                    write_scaling_callback_animation(frames, prefix)
                    animate_objective_minimization(callbackresults, prefix)

                if PLACE_EDGES_BETWEEN_CONNECTION_POINTS:
                    placed_edges = adjust_edges(placed_edges, placed_nodes,
                                                sc_unit_cell)

                placed_all = placed_nodes + placed_edges
                bonds_all = node_bonds + edge_bonds

                if WRITE_CHECK_FILES:
                    write_check_cif(template, placed_nodes, placed_edges, g,
                                    scaled_params, sc_unit_cell)

                if SINGLE_ATOM_NODE or NODE_TO_NODE:
                    placed_all, bonds_all = remove_Fr(placed_all, bonds_all)

                print('computing X-X bonds...')
                print()
                print('*******************************************')
                print('Bond formation : ')
                print('*******************************************')

                fixed_bonds, nbcount, bond_check = bond_connected_components(
                    placed_all, bonds_all, sc_unit_cell, max_length, BOND_TOL,
                    TRACE_BOND_MAKING, NODE_TO_NODE, EXPANSIVE_BOND_SEARCH,
                    ONE_ATOM_NODE_CN)

                print('there were ', nbcount, ' X-X bonds formed')

                if bond_check:
                    print('bond check passed')
                    bond_check_code = ''
                else:
                    print(
                        'bond check failed, attempting distance search bonding...'
                    )
                    fixed_bonds, nbcount = distance_search_bond(
                        placed_all, bonds_all, sc_unit_cell, 2.5,
                        TRACE_BOND_MAKING)
                    bond_check_code = '_BOND_CHECK'
                    print('there were', nbcount, 'X-X bonds formed')
                print()

                if CHARGES:
                    fc_placed_all, netcharge, onetcharge, rcb = fix_charges(
                        placed_all)
                else:
                    fc_placed_all = placed_all

                fixed_bonds = fix_bond_sym(fixed_bonds, placed_all,
                                           sc_unit_cell)

                if CHARGES:
                    print('*******************************************')
                    print('Charge information :                       ')
                    print('*******************************************')
                    print('old net charge                  :',
                          np.round(onetcharge, 5))
                    print('rescaling magnitude             :',
                          np.round(rcb, 5))

                    remove_net = choice(range(len(fc_placed_all)))
                    fc_placed_all[remove_net][4] -= np.round(netcharge, 4)

                    print('new net charge (after rescaling):',
                          np.sum([li[4] for li in fc_placed_all]))
                    print()

                vnames = '_'.join([v.split('.')[0] for v in v_set])

                if len(ea) <= 5:
                    enames = []
                    for e in [e[0:-4] for e in ea]:
                        if e not in enames:
                            enames.append(e)
                    enames = '_'.join(enames)

                else:
                    enames = str(len(ea)) + '_edges'

                if catenation:
                    cifname = template[
                        0:
                        -4] + '_' + vnames + '_' + enames + bond_check_code + '_' + 'CAT' + str(
                            cat_count) + '.cif'
                else:
                    cifname = template[
                        0:
                        -4] + '_' + vnames + '_' + enames + bond_check_code + '.cif'

                if WRITE_CIF:
                    print('writing cif...')
                    print()
                    if len(cifname) > 255:
                        cifname = cifname[0:241] + '_truncated.cif'
                    write_cif(fc_placed_all, fixed_bonds, scaled_params,
                              sc_unit_cell, cifname, CHARGES)

    if catenation and MERGE_CATENATED_NETS:

        print('merging catenated cifs...')
        cat_cifs = glob.glob('output_cifs/*_CAT*.cif')

        for comb in itertools.combinations(cat_cifs, cat_count):

            builds = [name[0:-9] for name in comb]

            print(set(builds))

            if len(set(builds)) == 1:
                pass
            else:
                continue

            merge_catenated_cifs(comb, CHARGES)

        for cif in cat_cifs:
            os.remove(cif)
Ejemplo n.º 17
0
def uni_uc3(this, utc, it):
    retn = ""
    for i in range(len(this)):
        if i % 2 != 0: retn += scale(10, 15, ord(this[i]) + utc - i) + ","
        else:          retn += scale(10, 12, ord(this[i]) - it + i) + ","
    return retn[0:-1]
Ejemplo n.º 18
0
    'does not contain many links. Try to add some.',
    'lacks images. Visual representation can be better than a lot of text.',
    'has short or no usage/examples section. This section is really important.',
    'has short or no documentation section. Documentation is almost more important than code itself.',
    'does not tell a user how to install your package. It\'s quite an obstacle, isn\'t it?',
    'lacks some support links such as additional resources or community links. Some guiding can help a user to use your package.',
    'lacks informative badges (you know, for example from build status services). They can tell something about your package and users appreciate that.',
    'xxx',
    'does not contain all additional important sections. They are "license", "authors" and "troubleshooting".'
])

SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))

all_features = np.array(sys.argv[-1].split(','), dtype=int)

x = np.array(scale(all_features)).reshape(1, -1)

scaler = joblib.load(SCRIPT_PATH + '/serialized/scaler.pkl')
clf = joblib.load(SCRIPT_PATH + '/serialized/classifier.pkl')

statistics = pd.read_csv(SCRIPT_PATH + '/../data/statistics-normalized.csv')

diffs = list()
for idx, value in enumerate(x[0]):
    diffs.append(statistics.values[1][idx] - value)

diffs[10] = -1  # don't deal with deprecation status
advice_idx = argmax(diffs, 3)

x = scaler.transform(x)
Ejemplo n.º 19
0
    def build (self, X, Y, quantitative=False, autoscale=False,
               nestimators=0, features='', random=False, tune=False, class_weight="balanced",
               cv='loo', n=2, p=1, lc=True, vpath = ''):
        """Build a new RF model with the X and Y numpy matrices

        """

        nobj, nvarx= np.shape(X)

        self.nobj  = nobj
        self.nvarx = nvarx

        self.quantitative = quantitative
        self.autoscale = autoscale
        self.estimators = nestimators
        self.features = features
        self.random = random
        self.class_weight = class_weight
        self.learning_curve = lc
        self.n = n
        self.p = p
        self.cv = cv

        self.X = X.copy()
        self.Y = Y.copy()

        self.vpath = vpath

        #print self.vpath
        if autoscale:
            self.X, self.mux = center(self.X)
            self.X, self.wgx = scale(self.X, autoscale)

        if random :
            RANDOM_STATE = None
        else:
            RANDOM_STATE = 1226 # no reason to pick this number

        if self.cv:
            self.cv = getCrossVal(self.cv, RANDOM_STATE, self.n, self.p)
            
        if tune :
            self.estimators, self.features = self.optimize (self.X, self.Y)

            if self.features=='none':
                self.features = None

        #print self.estimators

        if self.quantitative:
            print "Building Quantitative RF model"
            self.clf = RandomForestRegressor(n_estimators = int(self.estimators),
                                            warm_start=False,
                                            max_features=self.features,
                                            oob_score=True,
                                            random_state=RANDOM_STATE)
        else:
            print "Building Qualitative RF_model"
            self.clf = RandomForestClassifier(n_estimators = int(self.estimators),
                                            warm_start=False,
                                            max_features=self.features,
                                            oob_score=True,
                                            random_state=RANDOM_STATE,
                                            class_weight=self.class_weight)

        self.clf.fit(self.X, self.Y)
        
        print 'Building Learning Curves'
        if self.learning_curve:
            title = "Learning Curves (RF)"
            # SVC is more expensive so we do a lower number of CV iterations:
            cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
            estimator = self.clf
            plot = plot_learning_curve(estimator, title, self.X, self.Y, (0.0, 1.01), cv=cv)
            plot.savefig(self.vpath+"/RF-learning_curves.png", format='png')
            plot.savefig("./RF-learning_curves.png", format='png')


        # Regenerate the X and Y, since they might have been centered/scaled
        self.X = X.copy()
        self.Y = Y.copy()
Ejemplo n.º 20
0
    def validateLOO (self, A, gui=False):
        """ Validates A dimensions of an already built PLS model, using Leave-One-Out cross-validation

            Returns nothing. The results of the cv (SSY, SDEP and Q2) are stored internally
        """

        if self.X == None or self.Y == None:
            return 
        
        X = self.X
        Y = self.Y     

        nobj,nvarx = np.shape (X)

        SSY0 = 0.0
        for i in range (nobj):
            SSY0+=np.square(Y[i]-np.mean(Y))

        SSY = np.zeros(A,dtype=np.float64)
        YP = np.zeros ((nobj,A+1),dtype=np.float64)

        if gui: updateProgress (0.0)
        
        for i in range (nobj):
            
            # build reduced X and Y matrices removing i object
            Xr = np.delete(X,i,axis=0)
            Yr = np.delete(Y,i)

            Xr,muxr = center(Xr)
            Xr,wgxr = scale (Xr, self.autoscale)
           
            Yr,muyr = center(Yr)

            xp = np.copy(X[i,:])
            
            xp -= muxr
            xp *= wgxr
            
            # predicts y for the i object, using A LV
            yp = self.getLOO(Xr,Yr,xp,A)      
            yp += muyr

            # updates SSY with the object i errors
            YP[i,0]=Y[i]
            
            for a in range(A):
                SSY[a]+= np.square(yp[a]-Y[i])
                YP[i,a+1]=yp[a]

            if gui : updateProgress (float(i)/float(nobj))

        if gui : print
        
        self.SSY  = SSY        
        self.SDEP = [np.sqrt(i/nobj) for i in SSY]
        self.Q2   = [1.00-(i/SSY0) for i in SSY]
        
        self.Av = A

        return (YP)
Ejemplo n.º 21
0
def main():
    root = tk.Tk()
    root.withdraw()

    confirm = tk.messagebox.askokcancel(
        'Confirmação',
        'Você precisa selecionar o arquivo que deseja comparar.',
        icon='question')
    if confirm == True:
        file_path = filedialog.askopenfilename()
    confirm = tk.messagebox.askokcancel(
        'Confirmação',
        'Você precisa selecionar o diretório que deseja achar videos ou imagens similares.',
        icon='question')
    if confirm == True:
        folder_selected = filedialog.askdirectory()
        files = os.listdir(folder_selected)
    files = os.listdir(folder_selected)

    similaridadeMinima = float(
        input('Informe o percentual mínimo de similaridade: '))
    comparacoesMaximas = int(input('Informe o número máximo de comparações: '))

    templateType = mimetypes.MimeTypes().guess_type(file_path)[0].split("/")[0]
    listaItemsComparacao = []
    listaSimilaridadeComparacao = []
    comparacoes = 0
    for itemFile in files:
        similaridade = 0
        itemType = mimetypes.MimeTypes().guess_type(folder_selected + "/" +
                                                    itemFile)[0]
        if itemType != None and comparacoes < comparacoesMaximas:
            comparacoes = comparacoes + 1
            itemType = itemType.split("/")[0]

            if itemType == "image":
                img = cv2.imread(folder_selected + "/" + itemFile,
                                 cv2.IMREAD_COLOR)
                scaledImage = scale(max_height, max_width, img)
                if templateType == "image":
                    template = cv2.imread(file_path, cv2.IMREAD_COLOR)
                    template = scale(max_height, max_width, template)
                    similaridade = compareImages(template, scaledImage)
                elif templateType == "video":
                    similaridade = compareImageVideo(scaledImage, file_path)

            elif itemType == "video":
                video = folder_selected + "/" + itemFile
                if templateType == "image":
                    img = cv2.imread(file_path, cv2.IMREAD_COLOR)
                    scaledImage = scale(max_height, max_width, img)
                    similaridade = compareImageVideo(scaledImage, video)

                elif templateType == "video":
                    templateCap = cv2.VideoCapture(file_path)
                    similaridade = compareVideos(templateCap, video)
            similaridade = similaridade * 100
            if similaridade > similaridadeMinima:
                listaItemsComparacao.append(itemFile)
                listaSimilaridadeComparacao.append(
                    float("{0:.2f}".format(similaridade)
                          ) if similaridade > 0 else 0.0)
    data = {
        "Item": listaItemsComparacao,
        "Similaridade": listaSimilaridadeComparacao
    }
    df = pd.DataFrame(data)
    df.sort_values(by="Similaridade", ascending=False, inplace=True)
    print(df)
Ejemplo n.º 22
0
    def process(self):
        '''Main routine, chain together all of the steps imported from
        autoindex, integrate, pointgroup, scale and merge.'''

        try:
            hostname = os.environ['HOSTNAME'].split('.')[0]
            if version == 2:
                try:
                    write('Running on: %s' % hostname)
                except:
                    pass
            else:
                write('Running on: {}'.format(hostname))
        except Exception:
            pass

        # check input frame limits

        if not self._first_image is None:
            if self._metadata['start'] < self._first_image:
                start = self._metadata['start']
                self._metadata['start'] = self._first_image
                self._metadata['phi_start'] += self._metadata['phi_width'] * \
                                               (self._first_image - start)

        if not self._last_image is None:
            if self._metadata['end'] > self._last_image:
                self._metadata['end'] = self._last_image

        # first if the number of jobs was set to 0, decide something sensible.
        # this should be jobs of a minimum of 5 degrees, 10 frames.

        if self._n_jobs == 0:
            phi = self._metadata['oscillation'][1]

            if phi == 0.0:
                if version == 2:
                    try:
                        raise RuntimeError, 'grid scan data'
                    except:
                        pass
                else:
                    raise RuntimeError('grid scan data')

            wedge = max(10, int(round(5.0 / phi)))
            frames = self._metadata['end'] - self._metadata['start'] + 1
            n_jobs = int(round(frames / wedge))
            if self._max_n_jobs > 0:
                if n_jobs > self._max_n_jobs:
                    n_jobs = self._max_n_jobs
            self.set_n_jobs(n_jobs)

        if version == 2:
            try:
                write('Number of jobs: %d' % self._n_jobs)
                write('Number of cores: %d' % self._n_cores)
            except:
                pass
        else:
            write('Number of jobs: {}'.format(self._n_jobs))
            write('Number of cores: {}'.format(self._n_cores))

        step_time = time.time()

        if version == 2:
            try:
                write('Processing images: %d -> %d' %
                      (self._metadata['start'], self._metadata['end']))
            except:
                pass
        else:
            write('Processing images: {} -> {}'.format(self._metadata['start'],
                                                       self._metadata['end']))

        phi_end = self._metadata['phi_start'] + self._metadata['phi_width'] * \
                  (self._metadata['end'] - self._metadata['start'] + 1)
        if version == 2:
            try:
                write('Phi range: %.2f -> %.2f' %
                      (self._metadata['phi_start'], phi_end))

                write('Template: %s' % self._metadata['template'])
                write('Wavelength: %.5f' % self._metadata['wavelength'])
                write('Working in: %s' % os.getcwd())
            except:
                pass
        else:
            write('Phi range: {:.2f} -> {:.2f}'.format(
                self._metadata['phi_start'], phi_end))

            write('Template: {}'.format(self._metadata['template']))
            write('Wavelength: {:.5f}'.format(self._metadata['wavelength']))
            write('Working in: {}'.format(os.getcwd()))

        if self._plugin_library != " " and self._plugin_library != "None" and self._plugin_library != "none":
            oet = self._metadata['extra_text']
            et = None
            for line in oet.split('\n'):
                if line[0:3] != "LIB=":
                    if et == None:
                        et = line + "\n"
                    else:
                        et = et + line + "\n"
            if et == None:
                self._metadata[
                    'extra_text'] = "LIB=" + self._plugin_library + "\n"
            else:
                self._metadata[
                    'extra_text'] = et + "LIB=" + self._plugin_library + "\n"
        elif self._plugin_library == "None" or self._plugin_library == "none":
            oet = self._metadata['extra_text']
            et = None
            for line in oet.split('\n'):
                if line[0:3] != "LIB=":
                    if et == None:
                        et = line + "\n"
                    else:
                        et = et + line + "\n"
            self._metadata['extra_text'] = et

        if version == 2:
            try:
                write('Extra commands: %s' % self._metadata['extra_text'])
            except:
                pass
        else:
            write('Extra commands: {}'.format(self._metadata['extra_text']))

        try:
            self._p1_unit_cell = autoindex(self._metadata,
                                           input_cell=self._input_cell_p1)
        except Exception as e:
            traceback.print_exc(file=open('fast_dp.error', 'w'))
            if version == 2:
                try:
                    write('Autoindexing error: %s' % e)
                except:
                    pass
            else:
                write('Autoindexing error: {}'.format(e))
            fdpelogpath = get_afilepath()
            fdpelogprefix = get_afileprefix()
            if fdpelogpath:
                if version == 2:
                    try:
                        try:
                            shutil.copyfile(
                                'fast_dp.error',
                                os.path.join(fdpelogpath,
                                             fdpelogprefix + 'fast_dp.error'))
                            write('Archived fast_dp.error to %s' %
                                  os.path.join(fdpelogpath, fdpelogprefix +
                                               'fast_dp.error'))
                        except:
                            write('fast_dp.error not archived to %s' %
                                  os.path.join(fdpelogpath, fdpelogprefix +
                                               'fast_dp.error'))
                    except:
                        pass
                else:
                    try:
                        shutil.copyfile(
                            'fast_dp.error',
                            os.path.join(fdpelogpath,
                                         fdpelogprefix + 'fast_dp.error'))
                        write('Archived fast_dp.error to {}'.format(
                            os.path.join(fdpelogpath,
                                         fdpelogprefix + 'fast_dp.error')))
                    except:
                        write('fast_dp.error not archived to {}'.format(
                            os.path.join(fdpelogpath,
                                         fdpelogprefix + 'fast_dp.error')))
            return

        try:
            mosaics = integrate(self._metadata, self._p1_unit_cell,
                                self._resolution_low, self._n_jobs,
                                self._n_cores)
            if version == 2:
                try:
                    write('Mosaic spread: %.2f < %.2f < %.2f' % tuple(mosaics))
                except:
                    pass
            else:
                write('Mosaic spread: {0[0]:.2f} < {0[1]:.2f} < {0[2]:.2f}'.
                      format(tuple(mosaics)))
        except RuntimeError as e:
            traceback.print_exc(file=open('fast_dp.error', 'w'))
            if version == 2:
                try:
                    write('Integration error: %s' % e)
                except:
                    pass
            else:
                write('Integration error: {}'.format(e))
            fdpelogpath = get_afilepath()
            fdpelogprefix = get_afileprefix()
            if fdpelogpath:
                if version == 2:
                    try:
                        try:
                            shutil.copyfile(
                                'fast_dp.error',
                                os.path.join(fdpelogpath,
                                             fdpelogprefix + 'fast_dp.error'))
                            write('Archived fast_dp.error to %s' %
                                  os.path.join(fdpelogpath, fdpelogprefix +
                                               'fast_dp.error'))
                        except:
                            write('fast_dp.error not archived to %s' %
                                  os.path.join(fdpelogpath, fdpelogprefix +
                                               'fast_dp.error'))
                    except:
                        pass
                else:
                    try:
                        shutil.copyfile(
                            'fast_dp.error',
                            os.path.join(fdpelogpath,
                                         fdpelogprefix + 'fast_dp.error'))
                        write('Archived fast_dp.error to {}'.format(
                            os.path.join(fdpelogpath,
                                         fdpelogprefix + 'fast_dp.error')))
                    except:
                        write('fast_dp.error not archived to {}'.format(
                            os.path.join(fdpelogpath,
                                         fdpelogprefix + 'fast_dp.error')))
            return

        try:

            # FIXME in here will need a mechanism to take the input
            # spacegroup, determine the corresponding pointgroup
            # and then apply this (or verify that it is allowed then
            # select)

            metadata = copy.deepcopy(self._metadata)

            cell, sg_num, resol = decide_pointgroup(
                self._p1_unit_cell,
                metadata,
                input_spacegroup=self._input_spacegroup)
            self._unit_cell = cell
            self._space_group_number = sg_num

            if not self._resolution_high:
                self._resolution_high = resol

        except RuntimeError as e:
            if version == 2:
                try:
                    write('Pointgroup error: %s' % e)
                except:
                    pass
            else:
                write('Pointgroup error: {}'.format(e))
            return

        try:
            self._unit_cell, self._space_group, self._nref, beam_pixels = \
            scale(self._unit_cell, self._metadata, self._space_group_number, \
                   self._resolution_high, self._resolution_low, self._n_jobs,
                                self._n_cores)
            self._refined_beam = (self._metadata['pixel'][1] * beam_pixels[1],
                                  self._metadata['pixel'][0] * beam_pixels[0])

        except RuntimeError as e:
            if version == 2:
                try:
                    write('Scaling error: %s' % e)
                except:
                    pass
            else:
                write('Scaling error: {}'.format(e))
            return

        try:
            n_images = self._metadata['end'] - self._metadata['start'] + 1
            self._xml_results = merge()
            mtzlogpath = get_afilepath()
            mtzlogprefix = get_afileprefix()
            if mtzlogpath:
                if version == 2:
                    try:
                        try:
                            shutil.copyfile(
                                'fast_dp.mtz',
                                os.path.join(mtzlogpath,
                                             mtzlogprefix + 'fast_dp.mtz'))
                            write('Archived fast_dp.mtz to %s' % os.path.join(
                                mtzlogpath, mtzlogprefix + 'fast_dp.mtz'))
                        except:
                            write('fast_dp.mtz not archived to %s' %
                                  os.path.join(mtzlogpath,
                                               mtzlogprefix + 'fast_dp.mtz'))
                    except:
                        pass
                else:
                    try:
                        shutil.copyfile(
                            'fast_dp.mtz',
                            os.path.join(mtzlogpath,
                                         mtzlogprefix + 'fast_dp.mtz'))
                        write('Archived fast_dp.mtz to {}'.format(
                            os.path.join(mtzlogpath,
                                         mtzlogprefix + 'fast_dp.mtz')))
                    except:
                        write('fast_dp.mtz not archived to {}'.format(
                            os.path.join(mtzlogpath,
                                         mtzlogprefix + 'fast_dp.mtz')))
        except RuntimeError as e:
            if version == 2:
                try:
                    write('Merging error: %s' % e)
                except:
                    pass
            else:
                write('Merging error: {}'.format(e))
            return

        if version == 2:
            try:
                write('Merging point group: %s' % self._space_group)
                write('Unit cell: %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f' % \
                      self._unit_cell)

                duration = time.time() - step_time
                write('Processing took %s (%d s) [%d reflections]' %
                      (time.strftime('%Hh %Mm %Ss', time.gmtime(duration)),
                       duration, self._nref))
                write('RPS: %.1f' % (float(self._nref) / duration))
            except:
                pass
        else:
            write('Merging point group: {}'.format(self._space_group))
            write(
                'Unit cell: {0[0]:6.2f} {0[1]:6.2f} {0[2]:6.2f} {0[3]:6.2f} {0[4]:6.2f} {0[5]:6.2f}'
                .format(self._unit_cell))

            duration = time.time() - step_time
            write('Processing took {} ({:d} s) [{:d} reflections]'.format(
                time.strftime('%Hh %Mm %Ss', time.gmtime(duration)),
                int(duration), self._nref))

            write('RPS: {:.1f}'.format((float(self._nref) / duration)))

        # write out json and xml
        for func in (output.write_json, output.write_ispyb_xml):
            func(self._commandline, self._space_group, self._unit_cell,
                 self._xml_results, self._start_image, self._refined_beam)
Ejemplo n.º 23
0
    def build(self,
              X,
              Y,
              quantitative=False,
              autoscale=False,
              nestimators=0,
              features='',
              random=False,
              tune=False,
              class_weight="balanced",
              cv='loo',
              n=2,
              p=1,
              lc=True,
              vpath=''):
        """Build a new RF model with the X and Y numpy matrices

        """

        nobj, nvarx = np.shape(X)

        self.nobj = nobj
        self.nvarx = nvarx

        self.quantitative = quantitative
        self.autoscale = autoscale
        self.estimators = nestimators
        self.features = features
        self.random = random
        self.class_weight = class_weight
        self.learning_curve = lc
        self.n = n
        self.p = p
        self.cv = cv

        self.X = X.copy()
        self.Y = Y.copy()

        self.vpath = vpath

        #print self.vpath
        if autoscale:
            self.X, self.mux = center(self.X)
            self.X, self.wgx = scale(self.X, autoscale)

        if random:
            RANDOM_STATE = None
        else:
            RANDOM_STATE = 1226  # no reason to pick this number

        if self.cv:
            self.cv = getCrossVal(self.cv, RANDOM_STATE, self.n, self.p)

        if tune:
            self.estimators, self.features = self.optimize(self.X, self.Y)

            if self.features == 'none':
                self.features = None

        #print self.estimators

        if self.quantitative:
            print "Building Quantitative RF model"
            self.clf = RandomForestRegressor(n_estimators=int(self.estimators),
                                             warm_start=False,
                                             max_features=self.features,
                                             oob_score=True,
                                             random_state=RANDOM_STATE)
        else:
            print "Building Qualitative RF_model"
            self.clf = RandomForestClassifier(n_estimators=int(
                self.estimators),
                                              warm_start=False,
                                              max_features=self.features,
                                              oob_score=True,
                                              random_state=RANDOM_STATE,
                                              class_weight=self.class_weight)

        self.clf.fit(self.X, self.Y)

        print 'Building Learning Curves'
        if self.learning_curve:
            title = "Learning Curves (RF)"
            # SVC is more expensive so we do a lower number of CV iterations:
            cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
            estimator = self.clf
            plot = plot_learning_curve(estimator,
                                       title,
                                       self.X,
                                       self.Y, (0.0, 1.01),
                                       cv=cv)
            plot.savefig(self.vpath + "/RF-learning_curves.png", format='png')
            plot.savefig("./RF-learning_curves.png", format='png')

        # Regenerate the X and Y, since they might have been centered/scaled
        self.X = X.copy()
        self.Y = Y.copy()
Ejemplo n.º 24
0
    def reprocess(self):
        '''Main routine, chain together last few steps of processing i.e.
        pointgroup, scale and merge.'''

        try:
            hostname = os.environ['HOSTNAME'].split('.')[0]
            if version == 2:
                try:
                    write('Running on: %s' % hostname)
                except:
                    pass
            else:
                write('Running on: {}'.format(hostname))
        except Exception:
            pass

        # check input frame limits

        if not self._first_image is None:
            if self._metadata['start'] < self._first_image:
                start = self._metadata['start']
                self._metadata['start'] = self._first_image
                self._metadata['phi_start'] += self._metadata['phi_width'] * \
                                               (self._first_image - start)

        if not self._last_image is None:
            if self._metadata['end'] > self._last_image:
                self._metadata['end'] = self._last_image

        step_time = time.time()
        
        if version == 2:
            try:
                write('Processing images: %d -> %d' % (self._metadata['start'],
                                                       self._metadata['end']))
            except:
                pass
        else:
            write('Processing images: {} -> {}'.format(
                            self._metadata['start'], self._metadata['end']))

        phi_end = self._metadata['phi_start'] + self._metadata['phi_width'] * \
                  (self._metadata['end'] - self._metadata['start'] + 1)

        if version == 2:
            try:
                write('Phi range: %.2f -> %.2f' % (self._metadata['phi_start'],
                                                   phi_end))

                write('Template: %s' % self._metadata['template'])
                write('Wavelength: %.5f' % self._metadata['wavelength'])
                write('Working in: %s' % os.getcwd())
            except:
                pass
        else:
            write('Phi range: {:.2f} -> {:.2f}'.format(
                   self._metadata['phi_start'], phi_end))

            write('Template: {}'.format(self._metadata['template']))
            write('Wavelength: {:.5f}'.format(self._metadata['wavelength']))
            write('Working in: {}'.format(os.getcwd()))

        # just for information for the user, print all options for indexing
        # FIXME should be able to run the same from CORRECT.LP which would
        # work better....

        from xds_reader import read_xds_idxref_lp
        from cell_spacegroup import spacegroup_to_lattice

        results = read_xds_idxref_lp('IDXREF.LP')

        write('For reference, all indexing results:')
        if version == 2:
            try:
                write('%3s %6s %6s %6s %6s %6s %6s' % \
                    ('Lattice', 'a', 'b', 'c', 'alpha', 'beta', 'gamma'))
            except:
                pass
        else:
            write('{:3s} {:6s} {:6s} {:6s} {:6s} {:6s} {:6s}'.format(
              'Lattice ', 'a', 'b', 'c', 'alpha', 'beta', 'gamma'))


        for r in reversed(sorted(results)):
            if not type(r) == type(1):
                continue
            cell = results[r][1]
            if version == 2:
                try:
                    write('%7s %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f' % \
                            (spacegroup_to_lattice(r), cell[0], cell[1], cell[2],
                            cell[3], cell[4], cell[5]))
                except:
                    pass
            else:
                write('{:7s} {:6.2f} {:6.2f} {:6.2f} {:6.2f} {:6.2f} {:6.2f}'.format(
                                spacegroup_to_lattice(r), cell[0], cell[1], cell[2],
                                cell[3], cell[4], cell[5]))
        try:

            # FIXME in here will need a mechanism to take the input
            # spacegroup, determine the corresponding pointgroup
            # and then apply this (or verify that it is allowed then
            # select)

            metadata = copy.deepcopy(self._metadata)

            cell, sg_num, resol = decide_pointgroup(
                self._p1_unit_cell, metadata,
                input_spacegroup = self._input_spacegroup)
            self._unit_cell = cell
            self._space_group_number = sg_num

            if not self._resolution_high:
                self._resolution_high = resol

        except RuntimeError as e:
            if version == 2:
                try:
                    write('Pointgroup error: %s' % e)
                except:
                    pass
            else:
                write('Pointgroup error: {}'.format(e))
            return

        try:
            self._unit_cell, self._space_group, self._nref, beam_pixels = \
            scale(self._unit_cell, self._metadata, self._space_group_number, \
                   self._resolution_high, self._resolution_low, self._n_jobs,
                                self._n_cores)
            self._refined_beam = (self._metadata['pixel'][1] * beam_pixels[1],
                                  self._metadata['pixel'][0] * beam_pixels[0])

        except RuntimeError as e:
            if version == 2:
                try:
                    write('Scaling error: %s' % e)
                except:
                    pass
            else:
                write('Scaling error: {}'.format(e))
            return

        try:
            n_images = self._metadata['end'] - self._metadata['start'] + 1
            self._xml_results = merge(hklout='fast_rdp.mtz',
                                      aimless_log='aimless_rerun.log')
        except RuntimeError as e:
            if version == 2:
                try:
                    write('Merging error: %s' % e)
                except:
                    pass
            else:
                write('Merging error: {}'.format(e))
            return

        if version == 2:
            try:
                write('Merging point group: %s' % self._space_group)
                write('Unit cell: %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f' % \
                      self._unit_cell)

                duration = time.time() - step_time
                write('Reprocessing took %s (%d s) [%d reflections]' %
                      (time.strftime('%Hh %Mm %Ss',
                        time.gmtime(duration)), duration,
                        self._nref))
            except:
                pass
        else:
            write('Merging point group: {}'.format(self._space_group))
            write('Unit cell: {:6.2f} {:6.2f} {:6.2f} {:6.2f} {:6.2f} {:6.2f}'.format(
                                                        self._unit_cell))

            duration = time.time() - step_time
            write('Reprocessing took {} ({} s) [{} reflections]'.format(
                  (time.strftime('%Hh %Mm %Ss',
                                 time.gmtime(duration)), duration,
                                 self._nref)))

        # write out json and xml
        for func, filename in [ (output.write_json, 'fast_rdp.json'),
                                (output.write_ispyb_xml, 'fast_rdp.xml') ]:
          func(self._commandline, self._space_group,
               self._unit_cell, self._xml_results,
               self._start_image, self._refined_beam,
               filename=filename)