Esempio n. 1
0
def getQuotes():
  if not os.path.exists('quotes.pkl'):
    quotes = generateQuotes()
    quotes = cleanUp(quotes)
    with open('quotes.pkl', 'w') as f:
      pickle.dump(quotes, f)
  else:
    with open('quotes.pkl', 'r') as f:
      quotes = pickle.load(f)
      quotes = cleanUp(quotes)

  return quotes
Esempio n. 2
0
def createSamples(text, n=1):
    for i in range(0, n):
        file = open(path + 'sample' + str(i) + '.txt', 'w+')
        amt = random.randint(1, 5000)
        text = text.replace('$', '$' + str(amt))
        text = cleanUp(text)
        file.write(text)
Esempio n. 3
0
def commitCommand():
    '''(None) -> None
    Checks if localRepoPath is a valid path. If it is, syncs the local repository,
    copies all the extracted XMLs from Changed_Systems into the local repository,
    and pushes changes to the remote. Changes the last update date to current date
    '''
    localRepoPath = repoTools.getLocalRepo()
    if (os.path.isdir(localRepoPath) == False
        ):  #file is not right, print message and pass if case
        print(
            "Problem in path. Please set the path of local repository using 'repo' command"
        )
    else:
        #pull from remote to make sure local is up to date
        git.pull_repo(localRepoPath)

        #print clean up report in a text file
        stdout = sys.stdout  #keep a handle on the real standard output
        if (os.path.isfile('cleanUpReport.txt')):
            with open('cleanUpReport.txt', 'w') as f:
                f.write("")
        sys.stdout = open('cleanUpReport.txt', 'w', encoding='utf-8')
        try:
            cleanUp.cleanUp()
        except:
            print("Fail to run clean up script")
        #reset stdout
        sys.stdout = stdout
        #go through changed_systems and copy files to local repo
        source_dir = os.path.join(os.getcwd(), "Changed_Systems")
        print("Copying files...")
        for filename in glob.glob(os.path.join(source_dir, '*.*')):
            try:
                shutil.copy(filename, localRepoPath)
            except:
                print("Couldn't copy " + filename + " to " + localRepoPath +
                      ". Please close all files and try again")
        #push changes from local to remote
        git.push_all(localRepoPath)
        #change updated date
        today = str(datetime.date.today())
        fname = "last_commit_date.txt"
        with open(fname, 'w') as f:
            f.write(today)
        print("Changes successfully made. Last updated date is now " + today)
def inputSound():
    # Initialize PyAudio
    pyaud = pyaudio.PyAudio()
    global recording 
    recording = True

    # Open input stream, 16-bit mono at 44100 Hz
    # On my system, device 2 is a USB microphone, your number may differ.
    stream = pyaud.open(
        format = pyaudio.paInt16,
        channels = 1,
        rate = 44100,
        input_device_index = 1,
        input = True)
    times = []
    end = []
    notes = cleanup.notes
    global notesSung
    notesSung = []
    print "*listening*"
    start = time.time()
    # while not cleanup.silentEnd(notesSung):
    while not cleanup.ended(notesSung):
        # Read raw microphone data
        rawsamps = stream.read(1024)
        # print(type(rawsamps), rawsamps)
        # Convert raw data to NumPy array
        samps = numpy.fromstring(rawsamps, dtype=numpy.int16)
        # print(type(samps), samps)
        # Show the volume and pitch
        # analyse.loudness(samps), analyse.musical_detect_pitch(samps)
        freq = analyse.musical_detect_pitch(samps)
        notesSung.append(cleanup.getNote(freq, notes))
        #print cleanup.getNote(freq, notes)
        
    print "*Done*"
    recording = False
    # print notesSung
    cleanup.cleanUp(notesSung)
    # print notesSung
    # print
    cleanup.removeRepeats(notesSung)
    return notesSung
def inputSound():
    # Initialize PyAudio
    pyaud = pyaudio.PyAudio()
    
    # Open input stream, 16-bit mono at 44100 Hz
    # On my system, device 2 is a USB microphone, your number may differ.
    stream = pyaud.open(
        format = pyaudio.paInt16,
        channels = 1,
        rate = 44100,
        input_device_index = 1,
        input = True)
    times = []
    end = []
    notes = cleanup.notes
    notesSung = [] #so that there is an established pause in any beginning
    print "*listening*"
    start = time.time()
    fun = []
    while cleanup.silentEnd(notesSung):
        times.append(time.time()-start)
        fun.append("*")
        # Read raw microphone data
        rawsamps = stream.read(1024)
        # print(type(rawsamps), rawsamps)
        # Convert raw data to NumPy array
        samps = numpy.fromstring(rawsamps, dtype=numpy.int16)
        # print(type(samps), samps)
        # Show the volume and pitch
        # analyse.loudness(samps), analyse.musical_detect_pitch(samps)
        freq = analyse.musical_detect_pitch(samps)
        notesSung.append(cleanup.getNote(freq, notes))
        # print cleanup.getNote(freq, notes)
    print "*Done*"
    print times
    leng = len(notesSung)
    cleanup.cleanUp(notesSung)
    print notesSung
    print len(times), leng, len(notesSung), len(fun)
    print
    print
    cleanup.removeRepeats(notesSung)
    print notesSung
Esempio n. 6
0
def inputSound():
    # Initialize PyAudio
    pyaud = pyaudio.PyAudio()

    # Open input stream, 16-bit mono at 44100 Hz
    # On my system, device 2 is a USB microphone, your number may differ.
    stream = pyaud.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=44100,
                        input_device_index=1,
                        input=True)
    times = []
    end = []
    notes = cleanup.notes
    notesSung = []  #so that there is an established pause in any beginning
    print "*listening*"
    start = time.time()
    fun = []
    while cleanup.silentEnd(notesSung):
        times.append(time.time() - start)
        fun.append("*")
        # Read raw microphone data
        rawsamps = stream.read(1024)
        # print(type(rawsamps), rawsamps)
        # Convert raw data to NumPy array
        samps = numpy.fromstring(rawsamps, dtype=numpy.int16)
        # print(type(samps), samps)
        # Show the volume and pitch
        # analyse.loudness(samps), analyse.musical_detect_pitch(samps)
        freq = analyse.musical_detect_pitch(samps)
        notesSung.append(cleanup.getNote(freq, notes))
        # print cleanup.getNote(freq, notes)
    print "*Done*"
    print times
    leng = len(notesSung)
    cleanup.cleanUp(notesSung)
    print notesSung
    print len(times), leng, len(notesSung), len(fun)
    print
    print
    cleanup.removeRepeats(notesSung)
    print notesSung
Esempio n. 7
0
def readFilesToText():
    print('Accessing files from data/Docs...')
    directory = './data/Docs'
    read_path = './data/Read'
    unread_path = './data/Unreadable'
    save_path = './data/Docs_txt'
    unableToConvert = []
    countpdf = 1
    for file in os.listdir(directory):
        if (file not in os.listdir(read_path)):
            if ('.pdf' in file):
                countpdf += 1

            data = parser.from_file(directory + '/' + file)
            text = data['content']
            if text != None:
                text = cleanUp(text)
                if len(text) > 0:
                    # classifyFilesByText(file, text, reciprocalfolder,
                    #                     nonmutualndafolder, unclassified, r, m, n)

                    textFile = open(
                        ('./data/Docs_txt/' + file.partition('.')[0] + '.txt'),
                        'w')
                    textFile.write(text)
                else:
                    textFile.close()
                    continue
                textFile.close()
                # moving to read folder
                shutil.copyfile(directory + '/' + file, read_path + '/' + file)
            else:
                # print('Moving ' + file + " to unreadable.")

                # this means that the data is a scanned image pdf so we can convert
                #  into an image and use OCR to extract text
                print('Apache Tika returned None for file: ' + str(file))
                # moving to unread folder
                shutil.copyfile(directory + '/' + file,
                                unread_path + '/' + file)
                unableToConvert.append(file)
    print('\n\n The total number of PDFs were: ', countpdf)
    if (len(unableToConvert) == 0):
        print('All files converted to .txt')
        return

    print(
        str(len(unableToConvert)) +
        ' files were not converted to text. These files are: ' +
        str(unableToConvert))

    print('Converting the files to JPEG and then trying OCR...')

    for file in os.listdir(unread_path):
        if '.pdf' in file:
            pdf_path = unread_path + '/' + file
            try:
                pages = convert_from_path(pdf_path)
                page_counter = 1
                for page in pages:
                    filename = unread_path+'/'+file.partition('.')[0] + \
                        "_page"+str(page_counter)+'.jpg'
                    page.save(filename, 'JPEG')
                    page_counter += 1
                totalpages = page_counter - 1
                txtfile = save_path + '/' + file.partition('.')[0] + '.txt'
                textFile = open(txtfile, 'a')

                for i in range(1, totalpages):
                    filename = unread_path+'/' + \
                        file.partition('.')[0]+"_page"+str(i)+'.jpg'
                    text = str(
                        ((pytesseract.image_to_string(Image.open(filename)))))
                    text = cleanUp(text)
                    if len(text) > 0:
                        # classifyFilesByText(file, text, reciprocalfolder,
                        #                     nonmutualndafolder, unclassified, r, m, n)
                        textFile.write(text)
                    else:
                        textFile.close()
                        continue
                textFile.close()
                # moving to read folder
                shutil.move(unread_path + '/' + file, read_path + '/' + file)
            except (ValueError, PDFPageCountError):
                print('The pdf ' + file + ' could not be read')

    filelist = [f for f in os.listdir(unread_path) if f.endswith(".jpg")]
    for f in filelist:
        os.remove(unread_path + '/' + f)

    template_path = './data/Templates/'
    save_path = './data/Templates_txt/'
    for file in os.listdir(template_path):
        data = parser.from_file(template_path + file)
        text = data['content']
        if text != None:
            if len(text) > 0:
                text = cleanUp(text)
                # classifyFilesByText(file, text, reciprocalfolder,
                #                     nonmutualndafolder, unclassified, r, m, n)

                textFile = open((save_path + file.partition('.')[0] + '.txt'),
                                'w')
                textFile.write(text)
                # moving to read folder
                shutil.copyfile(template_path + file, read_path + '/' + file)