Beispiel #1
0
def start(paths, num=30, method='destroy', dirs=False):
    obj_data = smart.DataObj()
    my_cleaner = cleaner.Cleaner(shreds=num)
    for path in paths:
        obj_data.add_path(path)
    for file in obj_data.get_files():
        smart.smart_print()
        print(f'[{method}] File: {file}')
        if method == 'destroy':
            status = my_cleaner.shred_file(file)
        elif method == 'zeroing':
            status = my_cleaner.zero_file(file)
        else:
            status = my_cleaner.del_file(file)
        print_status(status)
    smart.smart_print()
    if dirs:
        for path in obj_data.get_dirs():
            print(f'Delete folder: {path}')
            status = my_cleaner.del_dir(path)
            print_status(status)
    if my_cleaner.errors:
        smart.smart_print(f' Errors: [{len(my_cleaner.errors)}]')
        for err in my_cleaner.errors:
            print(err)
 def __init__(self, scriptfile, testfile, options):
     self.options = options
     misc.isFileExist(scriptfile)
     self.scriptfile = scriptfile
     misc.isFileExist(testfile)
     self.testfile = testfile
     cleaner.Cleaner(self.testfile, self.tempdir).clean()
     merger.Merger(self.scriptfile, self.tempdir, self.options).merge()
def plushkin(path, delete):
    """ This script shows all the duplicates stored in your folders
        Optionally they can be deleted through interactive UI(use flag -d)
    """
    sys.excepthook = excepthook
    if os.path.isdir(path):
        search_results = scr.Searcher.search_clones(path)  # FM
        interface_reports = ui.UserInterface(search_results)
        interface_reports.show_search_report()
        if delete:
            for group_index in range(interface_reports.clone_groups_len):
                dup_group, save_index = interface_reports.show_cleaning_input(
                    group_index)
                interface_reports.report(
                    clr.Cleaner(dup_group, save_index).clean_and_report())
            interface_reports.overall()
    else:
        raise DirectoryNotFoundException('Path not found')
Beispiel #4
0
def main():
    foo = cleaner.Cleaner()
    foo.get_batch_num(open(TEST_SOURCE_CSV, 'r', 4096))
Beispiel #5
0
print dname

verbose = True
inverse_topic_word_map = {} #inverse topic hash map
unsaved_docs = 0
inverse_hashmap_word_lookup_length = 100

#loading dictionary and model 
print "loading dictionary and model"
dictionary = corpora.Dictionary.load(dname)
model = models.LdaModel.load(mname)

print "done loading model and dictionary"

#creating object for cleaner
cleaner = cleaner.Cleaner()

def create_inverse_hashmap(number_of_topics):
	#Generate the inverse topic hashmap
	for topicid in range(number_of_topics):
	    #Create a list of word prob tuples for a topic
	    topicid = int(topicid)
	    topic = model.state.get_lambda()[topicid] #Get words for this topic
	    topic = topic / topic.sum()
	    word_distribution_topic = [(dictionary[id], topic[id]) for id in range(len(dictionary))]
	    #Use the tuple to create the hashmap
	    for word, word_probability in word_distribution_topic:
			if not word in inverse_topic_word_map:
				inverse_topic_word_map[word] = [(topicid, word_probability)]
			else:
				inverse_topic_word_map[word].append((topicid, word_probability))
Beispiel #6
0
# Run cleaning on all the data, and place everything in one combined file

import cleaner

# Parameters
outfile = 'combined_data.csv'

input_suffix = '_course_evaluation.xls'
readers = [
    cleaner.CleanerFa05(),
    cleaner.CleanerSp05(),
    cleaner.Cleaner06(),
    cleaner.Cleaner()
]

fa05_names = ['fa05']
sp05_names = ['sp05']

y06_names = [t + '06' for t in ['fa', 'sp']]

modern_names = ['fa' + format(i, '02') for i in range(7, 17)] + \
    ['sp' + format(i, '02') for i in range(7, 17)] + \
    ['su' + format(i, '02') for i in range(14, 18)]

names = [fa05_names, sp05_names, y06_names, modern_names]

# Clean all the data and output the file
first_year = names[0][0]
names[0] = names[0][1:]

print('')
Beispiel #7
0
 def __init__(self, args):
     self.args = args
     cleaner.Cleaner(self.args, self.tempdir).clean()
     merger.Merger(self.args, self.tempdir).merge()
Beispiel #8
0
import parser
import translator
import cleaner
import sys
import os
import re

inFile = sys.argv[1]                               #take input from command line
outFile = inFile.split('.')[0] + '.hack'            #outFile is a hack file with same name as input file

cleaner = cleaner.Cleaner()                        #making cleaner object
translator = translator.Translator()                #translator object creation

try:
    fin = open(inFile,'r')
    fout = open(outFile,'w')                        #write mode outfile
    lineCtr = -1
    for line in fin:      #PASS : 1
        if (cleaner.clean(line) == None or  cleaner.clean(line)):
            if cleaner.clean(line) == None:
                cleaner.addLabel(line[1:(len(line)-2)],lineCtr + 1)
                lineCtr -= 1
            lineCtr += 1

    fin.seek(0)
    for line in fin:              # PASS: 2
        if cleaner.clean(line):                     #cleaner returns empty string if find a full line comment or a empty line
            line = cleaner.clean(line)
            #print(line)
            parsed = parser.Parser(line)            #parses the line
            iType = parsed.type()                  #gets the type of instruction
plt.plot(pitch_distribution_tonicRef.bins, pitch_distribution_tonicRef.vals)
plt.title('Pitch distribution')
plt.show()'''

intervals = []
intervals = peakdet.peakLocationDetection(pitch_distribution_tonicRef.vals)
for v in range(len(intervals)):
    intervals[v] = pitch_distribution_tonicRef.bins[intervals[v]]
pitchSeriesHz = histquantizer.Histquantizer(tonic_hz, CENTS_IN_OCTAVE,
                                            intervals, pitchSeriesHz)
'''
plt.plot(timeSeries,pitchSeriesHz)
plt.title('pitchSeriesHz_quantized')
plt.show()
'''
pitchSeriesHz = cleaner.Cleaner(history, pitchSeriesHz)
'''
#plt.figure()
plt.plot(timeSeries,pitchSeriesHz)
plt.title('pitchSeriesHz_quantized')
plt.show()
'''
'''plt.figure()
plt.plot(timeSeries,pitchSeriesHz)
plt.title('pitchSeriesHz_quantized_cleaned')
plt.show()'''
data = np.array([timeSeries, pitchSeriesHz])
data = data.T
with open(dataFile, 'w+') as datafile_id:
    #writer=csv.writer(datafile_id, delimiter='\t')
    #writer.writerows(zip(timeSeries,pitchSeriesHz))