Python Preprocessing 예제들, Preprocessing Python 예제들

예제 #1

0

파일 보기

def project_solution(height, length, gs_x, gs_y, u_in, cc, au, av, ap, rho, mu):

    # Get Mesh
    u_grid, v_grid, p_grid = Preprocessing.gen_mesh(height, length, gs_x, gs_y)

    # Get Initial Values
    u, v, p = Preprocessing.initial_con(u_grid[0].shape[0], u_grid[1].shape[0], u=0., v=0., p=0.)

    # Get Boundary Conditions
    bc = Preprocessing.boundary_cond('velocity', 'velocity gradient', 'no slip', 'no slip', [u_in, None, None, None])

    # Create viewers
    p_viewer = Viewer.FlowContours(p, p_grid[0], p_grid[1], [0, 0, length, height], 'Pressure')
    x_v_viewer = Viewer.FlowContours(u, u_grid[0], u_grid[1], [0, 0, length, height], 'X Velocity')
    y_v_viewer = Viewer.FlowContours(v, v_grid[0], v_grid[1], [0, 0, length, height], 'Y Velocity')

    s = Solver.Solution(p, u, v, u_grid[0], v_grid[0], v_grid[1], u_grid[1], bc, cc, au, av, ap, rho, mu, p_viewer,
                        x_v_viewer, y_v_viewer)

    p = s.p_n[s.ni/2:-1, s.nj/2]
    dp = p[-1]-p[0]

    print 'dp/dx = ' + str(dp/(gs_x*(len(p)-1)))
    print 'max U = ' + str(s.u_n[s.ni/2:-1, s.nj/2].max())
    print 'max V = ' + str(s.v_n[s.ni/2:-1, s.nj/2].max())

    Viewer.keep_open()

예제 #2

0

파일 보기

def oldNewDupes(sources=glob.glob("duplicate/sources/*.txt"),
                suspectDupe=glob.glob("duplicate/duplicates/*.txt")):
    dupesDictionary = {}

    for src in sources:
        duplicates = []
        doc = open(src, 'r', encoding='utf-8')
        string = doc.read()
        doc.close()
        src_tokens = nltk.word_tokenize(Preprocessing.process(string))

        for dp in suspectDupe.copy():
            if src == dp:
                continue
            doc = open(dp, 'r', encoding='utf-8')
            dupe = doc.read()
            doc.close()
            dupe = nltk.word_tokenize(Preprocessing.process(dupe))
            if jaccard(set(src_tokens), set(dupe)) > 0.9:
                duplicates.append(ntpath.basename(dp))
                suspectDupe.remove(dp)
                print(ntpath.basename(src))
                print(ntpath.basename(dp))

        try:
            dupesDictionary[ntpath.basename(src)] = duplicates
        except:
            dupesDictionary = {ntpath.basename(src): duplicates}

    return dupesDictionary

예제 #3

0

파일 보기

파일: run_dqn.py 프로젝트: Mog333/DeepRL

def runEpisode(ale, agent, stepsRemaining):
    maxEpisodeDuration = 60 * 60 * 5 #Max game duration is 5 minutes, at 60 fps
    framesElapsed       = 0
    totalEpisodeReward  = 0
    ale_game_over       = False

    screenObservation = ale.getScreenRGB()

    preprocessedObservation = Preprocessing.preprocessALEObservation(screenObservation, agent.inputHeight, agent.inputWidth)
    action = agent.startEpisode(preprocessedObservation)

    while not ale_game_over and framesElapsed < stepsRemaining and framesElapsed < maxEpisodeDuration:

        framesElapsed += 1
        reward = ale.act(action)
        totalEpisodeReward += reward

        if ale.game_over():
          ale_game_over = True

        screenObservation = ale.getScreenRGB()
        preprocessedObservation = Preprocessing.preprocessALEObservation(screenObservation, agent.inputHeight, agent.inputWidth)

        action = agent.stepEpisode(reward, preprocessedObservation)

    ale.reset_game()
    avgLoss = agent.endEpisode(0)

    return framesElapsed, totalEpisodeReward, avgLoss

예제 #4

0

파일 보기

파일: run_finger.py 프로젝트: SamaraCardoso27/TG-Background

def getImage():
	while True:
		if lib.ftrScanGetFrame(hDevice, pointer(pBuffer), None)==1:
			#print "Done!\n\nWriting to file......\n"
			vect = bytearray(pBuffer.raw)
			outputIm = Image.new("RGB", (ImageSize.nWidth, ImageSize.nHeight))
			outputIm.putdata(vect)
			base_name = '/home/samara/Documentos/TG/Amostras/Valter/Valter1'+str(datetime.datetime.now()).replace(':','_').replace('/','_')+'.jpeg'
			img = outputIm.save(base_name)
			#image_64 = base64.encodestring(open(img,"rb").read())
			#image_64 = unicode(base64.encodestring(open(img,"rb").read()))
			#print(image_64)
			#print('ok-teste')
			#return img
			#print('ok')
			#print(img)
			improveImage = Preprocessing.improveImage(base_name)
			skeletonization = Preprocessing.skeletonization(improveImage)
			createKeyPoints = Preprocessing.createKeyPoints(skeletonization)#
			encryptFingerprint = Preprocessing.encryptFingerprint(createKeyPoints)
			print(encryptFingerprint)
			#teste = Preprocessing.webservice(encryptFingerprint)
			#print(image_64)
			return encryptFingerprint	
			break
		else:
			PrintErrorMessage(lib.ftrScanGetLastError())
		sleep(0.2)
	
	print 'System Terminate'
	lib.ftrScanCloseDevice(hDevice)

예제 #5

0

파일 보기

파일: MultiNaiveBayes.py 프로젝트: maaaaqc/Machine-learning2

 def predict_and_write(self, min, max, alp):
     mnb = MultinomialNB(alpha=alp)
     train_data = Preprocessing.process_train()
     test_set = Preprocessing.process_test()
     x_test = test_set[1]
     id_data = test_set[0]
     x_all = train_data[:, 0]
     y_all = train_data[:, 1]
     vectorizer = TfidfVectorizer(min_df=min,
                                  max_df=max,
                                  ngram_range=(1, 1),
                                  stop_words='english',
                                  strip_accents='ascii')
     output = vectorizer.fit_transform(x_all)
     x_all = output[:, :]
     mnb.fit(x_all, y_all)
     x_test = vectorizer.transform(x_test.ravel())[:, :]
     y_pred = mnb.predict(x_test).ravel()
     y_pred = y_pred.reshape(len(y_pred), 1)
     y_pred = np.concatenate((id_data.reshape(len(id_data), 1), y_pred),
                             axis=1)
     first = ["Id", "Category"]
     y_pred = np.concatenate((np.array(first).reshape(1, 2), y_pred),
                             axis=0)
     np.savetxt("prediction.csv", y_pred, fmt="%s", delimiter=",")
     CSVChange.write()
     return

예제 #6

0

파일 보기

파일: run_dqn.py 프로젝트: Mog333/DeepRL

def runEpisode(ale, agent, stepsRemaining):
    maxEpisodeDuration = 60 * 60 * 5  #Max game duration is 5 minutes, at 60 fps
    framesElapsed = 0
    totalEpisodeReward = 0
    ale_game_over = False

    screenObservation = ale.getScreenRGB()

    preprocessedObservation = Preprocessing.preprocessALEObservation(
        screenObservation, agent.inputHeight, agent.inputWidth)
    action = agent.startEpisode(preprocessedObservation)

    while not ale_game_over and framesElapsed < stepsRemaining and framesElapsed < maxEpisodeDuration:

        framesElapsed += 1
        reward = ale.act(action)
        totalEpisodeReward += reward

        if ale.game_over():
            ale_game_over = True

        screenObservation = ale.getScreenRGB()
        preprocessedObservation = Preprocessing.preprocessALEObservation(
            screenObservation, agent.inputHeight, agent.inputWidth)

        action = agent.stepEpisode(reward, preprocessedObservation)

    ale.reset_game()
    avgLoss = agent.endEpisode(0)

    return framesElapsed, totalEpisodeReward, avgLoss

예제 #7

0

파일 보기

def preprocess_aalto_hand_data_sequences(
        data,
        time_per_frame=1 / 240.,
        max_skipped_frames=30,
        remove_timestamp_column=True,
        min_seq_length=240,
        representation=Representations.REPRESENTATION_XYZ,
        reorder_columns=True,
        verbose=True):
    data, step_index = preprocess_aalto_hand_data(
        data,
        remove_timestamp_column=False,
        representation=representation,
        reorder_columns=reorder_columns,
        verbose=verbose,
        returnStepIndex=True)

    data = pre.assign_sequence_ids(data, time_per_frame, max_skipped_frames)
    print '%d. Added a sequence id column: Found %d sequences based on %.6fs per frame and a maximum gap of %d skipped frames.' \
          % (step_index, data['Sequence_id'].max(), time_per_frame, max_skipped_frames)
    step_index += 1

    if min_seq_length is not None:
        data = pre.remove_short_sequences(data, min_seq_length)
        data = data.reset_index()
        num_seqs_left = len(data.groupby('Sequence_id'))
        print '%d. Removed %d short sequences (shorter than %d frames), leaving %d sequences.' \
         % (step_index, data['Sequence_id'].max()-num_seqs_left, min_seq_length, num_seqs_left)
        step_index += 1

    if remove_timestamp_column:
        data = remove_time_column(data, step_index, verbose)
        step_index += 1

    return data

예제 #8

0

파일 보기

def SI():
    global svc_clf, P300_clf
    ind = random.randint(0, 25)
    print("num =", ind)
    Series = np.load("../npSave/Pavarisa280219R06.npy")[ind, 0, :, :]
    print(np.asarray(Series).shape)
    bb, a = pre.butter_bandpass(0.5, 30, 500, order=5)
    bandpassData = pre.lfilter(bb, a, Series)
    print(bandpassData.shape)
    KaiserData = []
    for i in range(8):
        tmp = pre.KaiserFil(bandpassData[i])
        KaiserData.append(tmp)
    phaseData = np.array(
        [np.unwrap(np.angle(hilbert(i))) for i in bandpassData])
    powerData = np.array([np.abs(hilbert(i)) for i in bandpassData])
    aaa = np.ravel((phaseData, powerData))
    A = np.reshape(aaa, (1, -1))
    Seq = []
    output = svc_clf.decision_function(A)  # np.array([FeaturedData]))[0]
    for j in range(26):
        Seq.append([-output[0][j], j])
    Seq.sort()  # sort percentage
    SI_result = ''
    for t in Seq[0:9]:  # เลือก 9 ตัวที่มี percent มากสุด
        SI_result = SI_result + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'[
            t[1]]  # แปรผลเป็นตัวอักษรimport socket
    print('Result:' + SI_result)
    return SI_result

예제 #9

0

파일 보기

def browse_file():
    global filename
    filename = filedialog.askopenfilename()
    print(filename)

    # Audio preprocessing
    pr.noise_reductionM1(filename)

예제 #10

0

파일 보기

    def test_order_files_to_paires(self):
        self.assertEqual(Preprocessing.order_files_to_paires(["testfile35_1.fastq.gz", "testfile36_2.fastq.gz", "testfile36_1.fastq.gz", "testfile35_2.fastq.gz"]),
                         [("testfile35_1.fastq.gz", "testfile35_2.fastq.gz"), ("testfile36_1.fastq.gz", "testfile36_2.fastq.gz")])

        with self.assertRaises(Exception) as context:
            Preprocessing.order_files_to_paires(["testfile35_1.fastq.gz", "testfile36_2.fastq.gz", "testfile36_1.fastq.gz"]),

        self.assertTrue("File_list argument does not contain pair for '" + "testfile35_1.fastq.gz" + "'" in str(context.exception))

예제 #11

0

파일 보기

    def work(self):
        Prep.process(self.saveFilename)
        self.saveFilenameRes = 'imgResult.png'
        self.res = TI.KNNDigits(self.saveFilenameRes)
        self.result.set("Your number is " + str(self.res))

        #self.lblRight.labelText = self.result
        #.lblRight.grid(row=0, column=0, sticky="nsew")
        self.displayPictureRight()

예제 #12

0

파일 보기

def upload():
    # Panggil UploadForm dari form.py
    form = UploadForm()
    docList = renderDocList()
    global fileList , wordList , mVec
    
    # Jika ada file tersubmit, ambil nama file (secured oleh werkzeug) , cek ekstensinya jika .txt, simpan file ke folder test
    if form.validate_on_submit():
        for files in form.file.data :
            filename = secure_filename(files.filename)
            file_ext = os.path.splitext(filename)[1]
            if file_ext != '.txt':
                flash(f'Format of file(s) uploaded is not allowed (.txt only), file submission canceled!' , 'danger')
                return redirect(url_for('upload'))
            files.save('../test/' + filename)
        flash(f'File(s) added!' , 'success')

        # Karena ada input file baru, database kamus di-update
        # Menampung semua nama file ke dalam suatu variabel list fileList
        fileList = []
        for root, dirs, files in os.walk('../test', topdown=False):
            for name in files :
                dir = os.path.join(root,name).split('\\')  # Mengambil hanya nama filenya, tidak bersama direktori yang displit oleh \
                fileList.append(dir[1])

        contentList = []
        for name in fileList :
            # Menampung tiap konten dalam tiap file, melakukan cleaning, konversi ke token, menghapus stopword, dan lemmatize
            content = Read.readfile('../test/'+name)
            content = Read.cleaning(content)
            content = Read.token(content)
            content = Preprocessing.stopwords(content)
            content = Preprocessing.stemming(content)

            # Menggabungkan semua konten menjadi satu list
            contentList.append(content)

        # Membuat variabel penampung kata-kata yang ada di dokumen
        wordList = []
        for content in contentList :
            for word in content :
                if word not in wordList :
                    wordList.append(word)

        # Membuat variabel penampung jumlah kemunculan tiap kata pada tiap data
        mVec = [[0 for x in range(len(wordList))] for y in range(len(fileList))]
        j = 0
        for content in contentList :
            for word in content :
                for i in range(len(wordList)) :
                    if word == wordList[i] :
                        mVec[j][i] = mVec[j][i] + 1
            j = j + 1

        return redirect(url_for('upload'))
    return render_template('upload.html', form=form, docs = docList)

예제 #13

0

파일 보기

파일: IRISTest.py 프로젝트: Sewasale/irisfaceRGBD

def prepare_faces():
    base_path = 'E:/BosphorusDB/ply/'
    persons = get_person_ids(base_path)

    for person in persons:
        print person
        person_path = base_path + person + '_filtered'
        s_save_path = person_path.replace('ply',
                                          'npy').replace('_filtered', '')
        pr.convert_ply_to_npy(person_path, s_save_path)

예제 #14

0

파일 보기

    def prepFiles(self, type):
        self.cleanRepository()

        tabFiles = []
        Prep.preprocess(self.saveFilename, type)
        for file in os.listdir(self.dirPrep):
            if file.endswith(".png"):
                tabFiles.append(self.dirPrep + '/' + file)

        return tabFiles

예제 #15

0

파일 보기

    def solution_zscore(self, solution):

        new_solution = list()
        new_x = 0
        for index, x in enumerate(solution):
            new_x = 0
            new_x = (x - PRE.average(self.pre_processedX[index + 1])) / (
                PRE.standard_deviation(self.pre_processedX[index + 1]))
            new_solution.append(new_x)

        return new_solution

예제 #16

0

파일 보기

def main():
    # Load Data
    train_data, test_data = prep.load_data()

    # Encode Categorical Variables
    x_train, y_train, x_test, id_test = prep.encode_categories(train_data, test_data)

    # Decompositions
    pca = PCA(n_components=5)
    ica = FastICA(n_components=5, max_iter=1000)
    tsvd = TruncatedSVD(n_components=5)
    gp = GaussianRandomProjection(n_components=5)
    sp = SparseRandomProjection(n_components=5, dense_output=True)

예제 #17

0

파일 보기

def describeLines(image,numPoints=8,radius=1,eps=1e-7):
        imageLines=preprocessing.getHorizontalImageLinesGray(image,20)
        concattinatedImage=preprocessing.concatinateLines(imageLines)
		# compute the Local Binary Pattern representation
		# of the image, and then use the LBP representation
		# to build the histogram of patterns
        lbp = feature.local_binary_pattern(concattinatedImage, numPoints,radius, method="default")     
        (hist, _) = np.histogram(lbp, bins=256, range=(0,256))	
        #(hist, _) = np.histogram(lbp.ravel(),bins=np.arange(0, numPoints + 3),range=(0, numPoints + 2))
		# normalize the histogram [np.where(lbp<255)]
        hist = hist.astype("float")
        hist /= (hist.sum())
		# return the histogram of Local Binary Patterns
        return hist[0:255].tolist()

예제 #18

0

파일 보기

def plot_examples(path):
    img = cv2.imread(
        path,
        cv2.IMREAD_GRAYSCALE)

    height = img.shape[0]
    width = img.shape[1]
    img_prewitt=Preprocessing.prewitt(img,width,height,True)
    img_roberts= Preprocessing.roberts(img,width,height,True)
    plt.imshow(img_prewitt, cmap = 'Greys')
    plt.title('Prewitt operator')
    plt.show()
    plt.imshow(img_roberts, cmap='Greys')
    plt.title('Roberts operator')
    plt.show()

예제 #19

0

파일 보기

def build_phoneme_object(pair_index, word_index, info):
    phonemes = []
    for phoneme_index in range(3, len(info[pair_index][word_index])):
        if info[pair_index][word_index][phoneme_index] == '':
            break
        start = 0 if phoneme_index == 3 else \
            Preprocessing.toInt((info[pair_index][word_index + 1][phoneme_index - 1]))
        end = Preprocessing.toInt(info[pair_index][word_index + 1][phoneme_index])

        phoneme = {'phoneme': info[pair_index][word_index][phoneme_index],
                   'start': start,
                   'end': end
                   }
        phonemes.append(phoneme)
    return phonemes

예제 #20

0

파일 보기

def train_regressor(filename):
	pipe = Pipeline([('reduce_dim', TruncatedSVD(n_components=70)), ('regression', MLPRegressor(solver='lbfgs'))])
	param_grid={'regression__hidden_layer_sizes':[(230,),(300,)],'regression__alpha':[0.0001,0.1,0.01]}
	mlp = GridSearchCV(pipe, param_grid,cv=10)
	features,labels,sparse_encoder,int_encoder = Preprocessing.feature_extraction_regression_train(filename)
	mlp.fit(features,labels)
	return mlp,sparse_encoder,int_encoder

예제 #21

0

파일 보기

파일: TextClassification.py 프로젝트: gkim98/data-plus-2018-team17

def text_clean_df(merged_df, trans_df, pipeline=TOPIC_PIPELINE):
    df1 = text_df(merged_df, trans_df)
    col_processed = [
        pre.text_preprocessing(text, pipeline) for text in tqdm(df1['Convo_1'])
    ]
    df1['Convo_1'] = cc.untokenize(col_processed)
    return df1

예제 #22

0

파일 보기

파일: collateOS_1.0.py 프로젝트: obdurodon/CollateOS

def createJsonRepresentation(app):
    unit = getUnit(app)
    root = {}
    allWits = []
    rdgs = [el for el in app.childNodes if el.nodeType == 1]
    for rdg in rdgs:
        appLevel = {}
        appLevel['id'] = rdg.getAttribute('wit')
        tokenList = []
        ws = rdg.getElementsByTagName('w')
        for ind, w in enumerate(ws):
            if not 3 in [child.nodeType for child in w.childNodes]:
                continue
            currentWord = w
            if ind == 0:
                previousWord = ''
            else:
                previousWord = ws[ind-1]
            token = {}
            token['t'] = currentWord.toxml()[8 + len(w.getAttribute('n')):-4]
            token['n'] = Preprocessing.conflate(currentWord)
            token['u'] = unit
            tokenList.append(token)
        appLevel['tokens'] = tokenList
        allWits.append(appLevel)
    root['witnesses'] = allWits
    return json.loads(json.dumps(root))

예제 #23

0

파일 보기

def search(sentence):
    result = {}
    words = Preprocessing.Clean(sentence)

    for word in words:
        q = db.search(qr.Word == word)
        rating = []

        try:
            for i in range(len(q[0]['Count'])):
                rating.append(q[0]['Place'][i] - q[0]['Count'][i])
            documents = q[0]['Documents']
            rating, documents = (list(t)
                                 for t in zip(*sorted(zip(rating, documents))))
            result[word] = documents
        except:
            pass
    if sentence.startswith('"') and sentence.endswith('"'):
        res = ()
        if len(words) > 1 and result:
            for r in result:
                if res:
                    res = res.intersection(result[r])
                else:
                    res = set(result[r])
            return {'result': list(res)}
        else:
            return result
    else:
        return result

예제 #24

0

파일 보기

def test(X, cutline, columns):
    X.drop("scores", axis='columns', inplace=True)
    anomaly_path = "./CIC-output/normal-1.pcap_Flow.csv"
    anomaly_data = Preprocessing.load_df(anomaly_path)
    
    result = []
    c = 1
    for xi in range(len(anomaly_data)):
        
        nth_data = list()
        for x in list(anomaly_data.columns):
            nth_data.append(anomaly_data.loc[xi,x])
        X.loc[len(X)] = nth_data

        #LOF
        clf = LocalOutlierFactor(n_neighbors=2, contamination=0.1)
        y_pred = clf.fit_predict(X.values)
        X_scores = clf.negative_outlier_factor_
        X_scores = np.array(X_scores, dtype=np.float64)

        print("[{}] - {}".format(c, -X_scores[-1]))
        if -X_scores[-1] >= cutline:
            result.append(-1) # out
            c+=1
        else:
            result.append(1) # in
            c+=1
        print('=====')
        X = X.drop(X.index[len(X)-1])
    print("\n\n")
    print("FileName : {}".format(anomaly_path))
    print(result)
    print("-1 : {}".format(result.count(-1)))
    print("1 : {}".format(result.count(1)))
    return ''

예제 #25

0

파일 보기

파일: OMR.py 프로젝트: 7asebat/OMR

def run_OMR(inputPath, classifiersPath):
    image, useAugmented = Preprocessing.read_and_preprocess_image(inputPath)
    Processing = Pipeline.Augmented if useAugmented else Pipeline.Standard

    Classifier.load_classifiers(classifiersPath)
    image = Processing.remove_brace(image)
    lineImage, staffDim = Processing.extract_staff_lines(image)
    groups = Processing.split_bars(image, lineImage, staffDim)

    output = []
    for group in groups:
        components, sanitized, staffDim, lineImage, dotBoxes = Processing.segment_image(
            group)

        Classifier.assign_components(sanitized, components, staffDim)

        Processing.join_meters(components)
        Processing.bind_accidentals_to_following_notes(components)
        Processing.bind_dots_to_notes(components, dotBoxes)

        Processing.assign_note_tones(components, sanitized, lineImage,
                                     staffDim, group)
        output.append(Display.get_guido_notation(components))

    return output

예제 #26

0

파일 보기

        def random_forest():
            modelPath = "RandomForest1.joblib"
            dataFile = pd.read_csv(self.filePath)

            # Preprocessing:
            f = Preprocessing("output.csv")
            f.preprocessing("filetest.csv")
            file_data = pd.read_csv("output.csv")
            file_data_test = file_data.drop(['RainTomorrow'], axis=1)

            rf = load(modelPath)
            result = rf.predict(file_data_test)
            df = pd.DataFrame(result)
            label = Label(self.canvas, text=df)
            label.config(font=("Helvetica", 17))
            label.place(x=850, y=150)

예제 #27

0

파일 보기

파일: ContorBasedOrientation.py 프로젝트: HebaAhmedAli/Writer-Identification

def getFeatureVector(image):

    _, allcontour = preprocessing.segmentCharactersUsingProjection(
        image, "contorBasedOrientation")

    histDirections = np.zeros(9)
    totalContourpixels = 0

    for i in range(len(allcontour)):
        totalContourpixels += len(allcontour[i])
        for j in range(len(allcontour[i]) - 1):
            xdiff = allcontour[i][j][0] - allcontour[i][j + 1][0]
            ydiff = allcontour[i][j][1] - allcontour[i][j + 1][1]

            if xdiff < 0 and ydiff < 0:
                histDirections[1] += 1
            elif xdiff == 0 and ydiff > 0:
                histDirections[2] += 1
            elif xdiff > 0 and ydiff < 0:
                histDirections[3] += 1
            elif xdiff > 0 and ydiff == 0:
                histDirections[4] += 1
            elif xdiff > 0 and ydiff > 0:
                histDirections[5] += 1
            elif xdiff == 0 and ydiff > 0:
                histDirections[6] += 1
            elif xdiff < 0 and ydiff > 0:
                histDirections[7] += 1
            elif xdiff < 0 and ydiff == 0:
                histDirections[8] += 1

    return (histDirections / totalContourpixels).tolist()

예제 #28

0

파일 보기

파일: Training.py 프로젝트: OpenGelo/simpleNLP-Indonesia

def train(filename):

    fileTrain = xlrd.open_workbook(filename)
    dataTrain = fileTrain.sheet_by_index(0)
    rowLen = dataTrain.nrows

    filePreprocessed = openpyxl.Workbook()
    dataPreprocessed = filePreprocessed.active

    for i in range(0, rowLen):
        data_i = dataTrain.cell(i,0).value
        class_i = dataTrain.cell(i, 1).value
        prep = Preprocessing.preprocess(data_i)
        # print(prep)

        if prep:
            # prep = list(prep).split()
            for i in range(0,len(prep)):
                dataPreprocessed.append([''.join(prep[i]), class_i])

    filePreprocessed.save("dataset_preprocessing.xlsx")

    # FeatureSelection.mutualInformation()
    # FeatureSelection.elimination()
    # NaiveBayes.classify(filename)

예제 #29

0

파일 보기

파일: NeuralNetwork.py 프로젝트: kur0bi/Machine-Learning-Challenges

    def predict(self, X, means, std_devs):

        X = Preprocessing.zscore_norm_prediction(X, means, std_devs)
        solution = self.forward_propagation(X)[-1]

        print('----- SOLUTION -----')
        print(solution.item((0, 0)))

예제 #30

0

파일 보기

파일: SVM_classifier.py 프로젝트: maffos/CI_Project_18

def train_dimension_reduction(filename):
	pipe = Pipeline([('reduce_dim', TruncatedSVD()), ('classification', SVC())])
	param_grid={'reduce_dim__n_components': [70,75,85,100,120], 'classification__kernel':['poly','linear','rbf'], 'classification__C':[0.1,0.5,1,2]}
	svm = GridSearchCV(pipe, param_grid,cv=10)
	features,labels,sparse_encoder,int_encoder = Preprocessing.feature_extraction_sparse_train(filename)
	svm.fit(features,labels)
	return svm,sparse_encoder,int_encoder

예제 #31

0

파일 보기

파일: SVM_classifier.py 프로젝트: maffos/CI_Project_18

def train_sparse( filename ):
    	svm = SVC()
    	features,labels,sparse_encoder,int_encoder = Preprocessing.feature_extraction_sparse_train(filename)
	param_grid={'kernel':['poly','linear','rbf'], 'C': [0.1,0.5,0.9,1,2]}
	best_svm = GridSearchCV(svm, param_grid,cv=10)     	
	best_svm.fit(features,labels)
    	return best_svm,sparse_encoder,int_encoder

예제 #32

0

파일 보기

 def preprocess(self, img_face):
     Size_For_Eye_Detection = (48, 48)
     img_face = cv2.resize(img_face, Size_For_Eye_Detection,
                           Image.ANTIALIAS)
     img_norm = Preprocessing.LBH_Norm(img_face)
     #   img_norm = Preprocessing.mask_on_rect(img_norm)
     return img_norm, img_face

예제 #33

0

파일 보기

파일: Extract_tweets.py 프로젝트: DipeshkumarSah/tweteranalysis

    def save_preprocessed_tweets(self):
        with open(self.tweets_not_processed_file_path, 'r',
                  encoding='utf-8') as csv_r_file:
            csv_reader = csv.reader(csv_r_file)
            path = r"./data"
            self.tweets_processed_file_path = os.path.join(path,\
                      self.tweets_not_processed_file_name[:14]+\
                      self.tweets_not_processed_file_name[18:])
            # print(self.tweets_processed_file_path)
            with open(self.tweets_processed_file_path,
                      'w',
                      encoding='utf-8',
                      newline='') as csv_w_file:
                csv_writer = csv.writer(csv_w_file)
                csv_writer.writerow(['tweet_id', 'processed_tweet'])
                #skip headers
                next(csv_reader)
                for tweet in csv_reader:
                    if (len(tweet) == 2):
                        processed_tweet = preprocess.preprocess_tweets("".join(
                            tweet[1]))
                        # print(processed_tweet)
                        t = processed_tweet
                        t = " ".join(t)
                        csv_writer.writerow([tweet[0], t])

        return self.tweets_processed_file_path

예제 #34

0

파일 보기

파일: WordClouds.py 프로젝트: millerdw/millerdw.github.io

def createWordCloud(text):
    #removes STOPWORDS from the chart to make more readable
    return WordCloud(stopwords=Preprocessing.stemText(
        STOPWORDS | {'endofsen', 'endofpar', 'said', 'say', 'will'}),
                     background_color="white",
                     width=500,
                     height=500).generate(text)

예제 #35

0

파일 보기

파일: integrator.py 프로젝트: aniket-gaikwad/Applied-Machine-learning-Project---Fall-2014

def integrator():
    K=5
    Preprocessing.main()
    Boostrap.main(Preprocessing.inputSet)
    #print(Boostrap.testSet)
    global prediction
    prediction={}
    #bootstrapVoting
    for i in range(Boostrap.NO_OF_BOOTSTRAPS):
        print("*** %d BootStrap ****" %(i))
        #prediction[i]=KNN.main(Boostrap.bootstrap[i],Boostrap.testSet,K)
        (inputToRectifier,probabilityChart, priors, featureAndValues, trainSetLength)=NaiveBayes.NaiveBayes(Boostrap.bootstrap[i])
        print("********DECIION TREE************")
        tree=DecisionTree.GenerateTreeFromDatasetGivenByAssorter(inputToRectifier)
        print("********DECIION TREE ENDS************")
        print("********TEST NAIVE START************")
        probabilityDistributionChart=NaiveBayes.GetProbabilityDistributionTable(probabilityChart, priors, featureAndValues, Boostrap.testSet,trainSetLength)
        print("********TEST NAIVE ENDS************")
        print("********PREDICTION START************")
        prediction[i]=DecisionTree.PredictedList(probabilityDistributionChart,tree,Boostrap.testSet)
    #print("***prediction***",prediction)
    print("*********** BootStrapVoting *************")
    boostrapVoting()
    calculateTestError()

예제 #36

0

파일 보기

파일: XMLtoJSON.py 프로젝트: obdurodon/CollateOS

import datetime, json, os, Preprocessing, sys, xml.dom.minidom as minidom
os.chdir(os.path.abspath(os.path.dirname(__file__)))
args = sys.argv
assert len(args) == 3, "Expected 4 arguments exactly! -i followed by input directory path"
assert '-i' in args and os.path.exists(args[args.index('-i')+1]), "Invalid input directory"

path = args[args.index('-i')+1]

xmls = filter(lambda x: str(x.split('.')[len(x.split('.'))-1]) == 'xml' , os.listdir(path))
l = len(xmls)
count = 0
print
for afile in xmls:
    count += 1
    Preprocessing.updateProgressBar('XMLtoJSON.py', float(100)*count/l)
    unit = Preprocessing.parseName(afile) 
    root = {}
    alldocs = []
    rdgs = [el for el in minidom.parse(os.path.join(path, afile)).getElementsByTagName('*') if el.localName in ['lem', 'rdg']]
    for rdg in rdgs:
        docLevel = {}
        docLevel['id'] = rdg.getAttribute('wit')
        tokenList = []
        ws = rdg.getElementsByTagName('w')
        words = []
        for w in range(len(ws)):
            if not 3 in [child.nodeType for child in ws[w].childNodes]: #checking presence of text nodes inside the w
                continue
            currentWord = ws[w]
            previousWord = ''

예제 #37

0

파일 보기

파일: collateOS_1.0.py 프로젝트: obdurodon/CollateOS

            if textNodeValue != '-':
                normalizedAttrValue = token[0]['n']
            else:
                textNodeValue = ''
                normalizedAttrValue = ''
            tokenElement.appendChild(doc.createTextNode(textNodeValue))
            tokenElement.setAttribute('n', normalizedAttrValue)
            tokenElement.setAttribute('u', unitValue)
            tokenElement.setAttribute('witness', nameToNumber[number])
            blockElement.appendChild(tokenElement)
        number += 1
        line.appendChild(blockElement)
    return pseudoPrettyPrint(normalChars(line.toprettyxml().encode('utf-8')))

if os.path.exists('output.xml'):
    os.remove('output.xml')
with codecs.open('output.xml', 'a') as out:
    out.write('<collationOutput>\n')
    for app in apps:
        c += 1
        Preprocessing.updateProgressBar('Collation', float(100)*c/l)
        collationResults = collate_pretokenized_json(createJsonRepresentation(app), 'json')
        out.write(processColumn(collationResults, getUnit(app)))
        if c % FLUSH == 0:
            Preprocessing.updateProgressBar('Collation', float(100)*c/l, True)
            gc.collect()

    out.write('</collationOutput>')

print '\nTook', datetime.datetime.now() - startTime, 'to execute.'

예제 #38

0

파일 보기

파일: LeNetPredictor.py 프로젝트: asez73/dl-playground

 filenameTesting    = "../../data/testing_48x48_aligned_large.p_R.csv.gz"
 totTime = 0
 count = 0
 show = True
 ok = 0
 preds = 0
 import matplotlib.pyplot as plt
 plt.ion()
 pos = np.arange(6)+.5
 with gzip.open(filenameTesting) as f:
     reader = csv.reader(f)
     for row in reader:
         truePerson = int(row[0])
         vals = np.asarray(row[1:], np.float)
         start = time.time()
         preprocessed = Preprocessing.preprocess(vals, None, 46, show)
         res = pred.getPrediction(preprocessed / 255.)
         totTime += time.time() - start
         predPerson = int(res.argmax())
         plt.clf()
         plt.yticks(pos, ('Dejan', 'Diego', 'Martin', 'Oliver', 'Rebekka', 'Ruedi'))
         predPValue = res[0][res.argmax()]
         if (predPValue > 0.9):
             preds += 1
         if predPerson == truePerson:
             if (predPValue > 0.9):
                 ok += 1
             col = 'g'
         else:
             col = 'r'
         plt.barh(pos, np.asarray(res[0], dtype = float), align='center', color = col)

예제 #39

0

파일 보기

파일: main.py 프로젝트: agomariz/Kaggle-Bike_sharing

import pandas as pd
import Preprocessing as prep

train_data = pd.read_csv('data/train.csv')
preprocessed_data = prep.preprocess(train_data)
print preprocessed_data.describe()

예제 #40

0

파일 보기

파일: convolutional_mlp_face.py 프로젝트: asez73/dl-playground

def evaluate_lenet5(topo, learning_rate=0.005, n_epochs=500, datasetName='mnist.pkl.gz',
                    batch_size=4242, stateIn = None, stateOut = None):

    rng = numpy.random.RandomState(23455)
    theano_rng = RandomStreams(numpy.random.randint(2 ** 30))

    #Original
    #datasets = load_data(dataset)
    #n_out = 10

    datasets = Preprocessing.load_pictures()
    # pickle.dump(datasets, open( datasetName, "wb" ) ) #Attention y is wrong
    # print("Saveing the pickeled data-set")

    #Loading the pickled images
    #print("Loading the pickels data-set " + str(datasetName))
    #datasets = pickle.load(open(datasetName, "r"))

    n_out = 6
    batch_size = 10
    print("       Learning rate " + str(learning_rate))


    # Images for face recognition
    #train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[0]
    test_set_x, test_set_y = datasets[1]

    # compute number of minibatches for training, validation and testing
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]

    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'
    print 'Number of Kernels' + str(topo.nkerns)


    in_2 = 14      #Input in second layer (layer1)


    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, topo.ishape[0], topo.ishape[1]))

    # Using presistent state from last run
    w0 = w1 = b0 = b1 = wHidden = bHidden = wLogReg = bLogReg = None
    if stateIn is not None:
        print("  Loading previous state ...")
        state = pickle.load(open(stateIn, "r"))
        convValues = state.convValues
        w0 = convValues[0][0]
        b0 = convValues[0][1]
        w1 = convValues[1][0]
        b1 = convValues[1][1]
        hiddenVals = state.hiddenValues
        wHidden = hiddenVals[0]
        bHidden = hiddenVals[1]
        logRegValues = state.logRegValues
        wLogReg = logRegValues[0]
        bLogReg = logRegValues[1]
        print("Hallo Gallo")

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
                                image_shape=(batch_size, 1, topo.ishape[0],  topo.ishape[0]),
                                filter_shape=(topo.nkerns[0], 1, topo.filter_1, topo.filter_1),
                                poolsize=(topo.pool_1, topo.pool_1), wOld=w0, bOld=b0)

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
                                image_shape=(batch_size, topo.nkerns[0], topo.in_2, topo.in_2),
                                filter_shape=(topo.nkerns[1], topo.nkerns[0], topo.filter_2, topo.filter_2),
                                poolsize=(topo.pool_2, topo.pool_2), wOld=w1, bOld=b1)

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # Evt. some drop out for the fully connected layer
    # Achtung p=1 entspricht keinem Dropout.
    # layer2_input = theano_rng.binomial(size=layer2_input.shape, n=1, p=1 - 0.02) * layer2_input
    # paper_6 no dropout
    # paper_14 again 0.02 dropout
    # paper_15 again no dropout

    layer2 = HiddenLayer(rng, input=layer2_input, n_in=topo.nkerns[1] * topo.hidden_input,
                         n_out=topo.numLogisticInput, activation=T.tanh, Wold = wHidden, bOld = bHidden)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=topo.numLogisticInput, n_out=n_out, Wold = wLogReg, bOld=bLogReg )

    # Some regularisation (not for the conv-Kernels)
    L2_sqr = (layer2.W ** 2).sum() + (layer3.W ** 2).sum()

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y) + 0.001 * L2_sqr
    # paper7
    # paper9 back to 0.001 again
    # paper10 no reg. 
    # paper12 back to 0.001 again

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function([index], layer3.errors(y),
                                 givens={
                                     x: test_set_x[index * batch_size: (index + 1) * batch_size],
                                     y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    validate_model = theano.function([index], layer3.errors(y),
                                     givens={
                                         x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                                         y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))



    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000 # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False
    epoch_fraction = 0.0
    while (epoch < n_epochs) and (not done_looping):
        # New epoch the training set is disturbed again
        print("  Starting new training epoch")
        print("  Manipulating the training set")
        train_set_x, train_set_y = Preprocessing.giveMeNewTraining()
        n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        n_train_batches /= batch_size
        validation_frequency = min(n_train_batches, patience / 2)
        print("  Compiling new function")
        learning_rate *= 0.993 #See Paper from Cican
        train_model = theano.function([index], cost, updates=updates,
                                      givens={
                                          x: train_set_x[index * batch_size: (index + 1) * batch_size],
                                          y: train_set_y[index * batch_size: (index + 1) * batch_size]})
        print("  Finished compiling the training set")

        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches): #Alle einmal anfassen
            iter = (epoch - 1) * n_train_batches + minibatch_index
            epoch_fraction +=  1.0 / float(n_train_batches)
            if iter % 100 == 0:
                print 'training @ iter = ', iter, ' epoch_fraction ', epoch_fraction
            cost_ij = train_model(minibatch_index)
            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                # test it on the test set
                test_start = time.clock();
                test_losses = [test_model(i) for i in xrange(n_test_batches)]
                train_costs = [train_model(i) for i in xrange(n_test_batches)]
                dt = time.clock() - test_start
                print'Testing %i faces in %f msec image / sec  %f', batch_size * n_test_batches, dt, dt/(n_test_batches * batch_size)
                test_score = numpy.mean(test_losses)
                train_cost = numpy.mean(train_costs)
                print('%i, %f, %f, %f, %f, 0.424242' % (epoch,  this_validation_loss * 100.,test_score * 100., learning_rate, train_cost))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # # test it on the test set
                    # test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    # test_score = numpy.mean(test_losses)
                    # print(('     epoch %i, minibatch %i/%i, test error of best '
                    #        'model %f %%') %
                    #       (epoch, minibatch_index + 1, n_train_batches,
                    #        test_score * 100.))

                # if (this_validation_loss < 0.02):
                #     learning_rate /= 2
                #     print("Decreased learning rate due to low xval error to " + str(learning_rate))


            if patience <= iter:
                print("--------- Finished Looping ----- earlier ")
                done_looping = True
                break

    end_time = time.clock()
    print('----------  Optimization complete -------------------------')
    print('Res: ', str(topo.nkerns))
    print('Res: ', learning_rate)
    print('Res: Best validation score of %f %% obtained at iteration %i,' \
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print('Res: The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
    # Oliver
    if not os.path.isdir("conv_images"):
        os.makedirs("conv_images")
        os.chdir("conv_images")

    # d = layer0.W.get_value() #e.g.  (20, 1, 5, 5) number of filter, num of incomming filters, dim filter
    # for i in range(0, numpy.shape(d)[0]):
    #     dd = d[i][0]
    #     rescaled = (255.0 / dd.max() * (dd - dd.min())).astype(numpy.uint8)
    #     img = Image.fromarray(rescaled)
    #     img.save('filter_l0' + str(i) + '.png')
    #
    # d = layer1.W.get_value() #e.g.  (20, 1, 5, 5) number of filter, num of incomming filters, dim filter
    # for i in range(0, numpy.shape(d)[0]):
    #     dd = d[i][0]
    #     rescaled = (255.0 / dd.max() * (dd - dd.min())).astype(numpy.uint8)
    #     img = Image.fromarray(rescaled)
    #     img.save('filter_l1' + str(i) + '.png')

    state = LeNet5State(topology=topo,
                        convValues = [layer0.getParametersAsValues(), layer1.getParametersAsValues()],
                        hiddenValues = layer2.getParametersAsValues(),
                        logRegValues = layer3.getParametersAsValues())
    print
    if stateOut is not None:
        pickle.dump(state, open(stateOut, 'wb') ) #Attention y is wrong
        print("Saved the pickeled data-set")

    return learning_rate

예제 #41

0

파일 보기

파일: JSONtoXML.py 프로젝트: obdurodon/CollateOS

assert len(args) == 3, "Expected exactly 2 arguments!\n\n-i followed by input directory path"
assert '-i' in args and os.path.exists(args[args.index('-i')+1]), "Invalid input directory"

def normalChars(l):
    return l.replace('&lt;', '<').replace('&gt;','>').replace('&quot;', '"')

path = args[args.index('-i')+1]
jsons = filter(lambda x: str(x.split('.')[len(x.split('.'))-1]) == 'json' , os.listdir(path))
os.chdir(path)
c = 0
l = len(jsons)
couldnt = []
print
for afile in jsons:
    c += 1
    Preprocessing.updateProgressBar('JSONtoXML.py', float(100)*c/l)
    data = json.loads(open(afile, 'r').read())
    nameToNumber = {number:name for number, name in enumerate(data['witnesses'])}
    with codecs.open(afile[:-4] + 'xml','w') as out:
        doc = minidom.Document()
        witnessElement = doc.createElement('witnesses')
        doc.appendChild(witnessElement)
        blockc = 0
        for block in data['table']:
            blockc += 1
            blockElement = doc.createElement('block')
            blockElement.setAttributeNode(doc.createAttribute('n'))
            blockElement.setAttribute('n', str(blockc-1))
            number = 0
            for token in block:
                tokenElement = doc.createElement('token')

예제 #42

0

파일 보기

파일: NaiveBayes.py 프로젝트: OpenGelo/simpleNLP-Indonesia

    def classify(self,article, dataModel):
        "fungsi mengklasifikasikan suatu artikel berdasarkan model Bayes yang telah dibuat"

        label = []

        # preprocessing for data input
        token = Preprocessing.preprocess(article)

        # get label from xls
        for i in range(1, dataModel.ncols):
            label.append(dataModel.cell(0, i).value)

        if token:
            probability = []
            # get probability
            i = 0
            idx = 0
            while i < len(token):
                probability.append([])
                j = 2
                while j < dataModel.nrows:
                    if token[i] == dataModel.cell(j, 0).value:
                        for k in range(0, len(label)):
                            pd = dataModel.cell(j, k+1).value
                            probability[idx].append(pd)
                        break
                    j += 1
                if j == dataModel.nrows:
                    del probability[-1]
                    i += 1
                else:
                    i += 1
                    idx += 1

            # probability calc
            pFinal = []
            for i in range(0, len(label)):
                pc = dataModel.cell(1, i+1).value
                valP = 1
                for j in range(0, len(probability)):
                    valP *= probability[j][i]

                if len(probability) == 1:
                    if valP != 1:
                        value = valP
                    else:
                        value = 0
                else:
                    if valP != 1:
                        value = valP*pc
                    else:
                        value = 0

                pFinal.append(value)

            maks = max(pFinal)

            if maks != 0:
                for i in range(0, len(pFinal)):
                    if pFinal[i] == maks:
                        idxMax = i
                decision = dataModel.cell(0, idxMax+1).value
            else:
                decision = 'ERROR'
        else:
            decision = 'ERROR'

        return decision

예제 #43

0

파일 보기

파일: run_dqtn.py 프로젝트: Mog333/DeepRL

def runEpisode(ale, agent, stepsRemaining, currentEpisodeTask, frameSkip, maxNoActions):
    maxEpisodeDuration = 60 * 60 * 5 #Max game duration is 5 minutes, at 60 fps
    framesElapsed       = 0
    totalEpisodeReward  = 0
    ale_game_over       = False
    width, height = ale.getScreenDims()
    screenBuffer = np.zeros((2, height, width), dtype=np.uint8)

    screenBufferIndex = 0
    frameSkipCounter = 0
    rewardPool = 0
    reward = 0
    startingLives = -1

    if maxNoActions > 0:
        numNoActionsToTake = np.random.randint(0, maxNoActions)
        for x in xrange(numNoActionsToTake):
            ale.act(0)

    screenObservation = ale.getScreenRGB()
    grayScreenObservation = Preprocessing.grayScaleALEObservation(screenObservation)
    screenBuffer[screenBufferIndex] = grayScreenObservation
    screenBufferIndex = (screenBufferIndex + 1) % 2

    preprocessedObservation = Preprocessing.resizeALEObservation(grayScreenObservation, agent.inputHeight, agent.inputWidth)
    action = agent.startEpisode(preprocessedObservation, currentEpisodeTask)
    startingLives = ale.lives()


    while not ale_game_over and framesElapsed < stepsRemaining and framesElapsed < maxEpisodeDuration:
        framesElapsed += 1
        frameSkipCounter = 0
        while frameSkipCounter < frameSkip:
            rewardPool += ale.act(action)
            # if not ale.game_over() and startingLives == -1:
                # startingLives = ale.lives()
            screenObservation = ale.getScreenRGB()
            grayScreenObservation = Preprocessing.grayScaleALEObservation(screenObservation)
            screenBuffer[screenBufferIndex] = grayScreenObservation
            screenBufferIndex = (screenBufferIndex + 1) % 2
            frameSkipCounter += 1
            if ale.game_over() or (agent.training == True and agent.deathEndsEpisode and ale.lives() != startingLives):
                ale_game_over = True
                break

        reward = rewardPool
        rewardPool = 0

        totalEpisodeReward += reward

        # if not ale.game_over() and startingLives == -1:
        #     startingLives = ale.lives()

        # if ale.game_over() or (agent.deathEndsEpisode and ale.lives() != startingLives):
        #     ale_game_over = True

        maxImage = np.maximum(screenBuffer[screenBufferIndex, ...], screenBuffer[screenBufferIndex - 1, ...])
        preprocessedObservation = Preprocessing.resizeALEObservation(maxImage, agent.inputHeight, agent.inputWidth)

        action = agent.stepEpisode(reward, preprocessedObservation)

    ale.reset_game()
    avgLoss = agent.endEpisode(reward)

    return framesElapsed, totalEpisodeReward, avgLoss

예제 #44

0

파일 보기

파일: XMLStoJSON.py 프로젝트: obdurodon/CollateOS

assert len(args) == 5, "Expected 4 arguments! \n\n-i followed by input directory path\n-o followed by output file path"
assert '-i' in args and os.path.exists(args[args.index('-i')+1]), "Invalid input directory"
assert '-o' in args and not args.index('-o') == len(args)-1, "No output file path provided"

path = args[args.index('-i')+1]
jsonFileName = os.path.join(os.getcwd(), args[args.index('-o')+1])

xmls = filter(lambda x: str(x.split('.')[len(x.split('.'))-1]) == 'xml' , os.listdir(path))
root = {}
alldocs = []
l = len(xmls)
count = 0
for afile in xmls:
    count += 1
    print 'XMLsToJSON.py: Processing', afile, 'file', count, 'out of', l
    unit = Preprocessing.parseName(afile) 
    docLevel = {}
    docLevel['id'] = afile
    tokenList = []
    if debug:
        html.write('<h2>' + afile + '</h2><table border = "1"><th>Original<th>Conflated</th>')
    ws = minidom.parse(os.path.join(path, afile)).getElementsByTagName('w')
    words = []
    for w in range(len(ws)):
        if not 3 in [child.nodeType for child in ws[w].childNodes]: #checking presence of text nodes inside the w
            continue
        currentWord = ws[w]
        previousWord = ''
        try:
            previousWord = ws[w-1]
        except IndexError:

예제 #45

0

파일 보기

파일: Postprocessing.py 프로젝트: obdurodon/CollateOS

    return previous_row[-1]

def isBlank(node):
    return node.getAttribute('n') == ''

   
os.chdir(path)
if os.path.exists('Postprocessed'):
    shutil.rmtree('Postprocessed')
os.mkdir('Postprocessed')

print

for afile in xmls:
    c += 1
    Preprocessing.updateProgressBar('Postprocessing.py', float(100)*c/x)
    doc = minidom.parse(os.path.join(path, afile))
    blocks = doc.getElementsByTagName('block')
    tokens = doc.getElementsByTagName('token')
    blanks = [token for token in tokens if token.getAttribute('n') == '']
    if blanks:
        #generate dictionary of witness to its token nodes for each row
        column1Toks = blocks[0].getElementsByTagName('token')
        wit2toks = {}
        for token in column1Toks:
            wit = token.getAttribute('witness')
            row = [token for token in doc.getElementsByTagName('token') if token.nodeType == 1 and token.getAttribute('witness') == wit]
            wit2toks[wit] = row
        for (wit, row) in wit2toks.items():
            #generate list of lists of sequences of empty tokens
            fin = []

Python Preprocessing, espresso 예제들