Esempio n. 1
0
import sys

sys.path.append(r'./') 
from store import GFS
gfs = GFS()
gfs.store_2_db("test","*****@*****.**","12321451234","/root/Downloads/Test_CVs/HenryChai.doc","filetext")
gfs.search_cv("file","text1")
Esempio n. 2
0
import sys

sys.path.append(r'./')
from store import GFS

gfs = GFS()
gfs.store_2_db("test", "*****@*****.**", "12321451234",
               "/root/Downloads/Test_CVs/HenryChai.doc", "filetext")
gfs.search_cv("file", "text1")
Esempio n. 3
0
    def extract_control(self):
        gfs = GFS()
        if self.suffix == "pdf":
            cprint("HEADER", "Enter extract pdf...")
            pdf = ExtractPdf(self.filename)
            return_list = pdf.extract_text_from_pdf1()
            if return_list[0] == 0:
                print pdf.get_all_text()
                cprint("OK", return_list[1])
                # extract name, email, phone number
                info_list = self.extract_info(pdf.text)
                # import mongoDB
                if info_list[0] and info_list[1] and info_list[2]:
                    gfs.store_2_db(info_list[0], info_list[1], info_list[2], self.filename, pdf.text)

            elif return_list[0] == -1:
                cprint(
                    "WARNING",
                    return_list[1]
                    + ",make sure the pdf file is not produced by scanned images and no space/special characters in file name as well.",
                )
            else:
                cprint("FAIL", return_list[1])

        elif self.suffix == "docx":
            cprint("HEADER", "Enter extract docx...")
            docx = ExtractDocx(self.filename)
            return_list = docx.extract_text_from_docx1()
            if return_list[0] == 0:
                print docx.get_all_text()
                cprint("OK", return_list[1])
                # extract name, email, phone number
                info_list = self.extract_info(docx.text)
                # import mongoDB
                if info_list[0] and info_list[1] and info_list[2]:
                    gfs.store_2_db(info_list[0], info_list[1], info_list[2], self.filename, docx.text)

            elif return_list[0] == -1:
                cprint(
                    "WARNING",
                    return_list[1]
                    + ",make sure the file is not empty and no space/special characters in file name as well.",
                )
            else:
                cprint("FAIL", return_list[1])

        elif self.suffix == "txt":
            cprint("HEADER", "Enter extract txt...")
            txt = ExtractTxt(self.filename)
            return_list = txt.extract_text_from_txt()
            if return_list[0] == 0:
                # print txt.get_all_text()
                cprint("OK", return_list[1])
                # extract name, email, phone number
                info_list = self.extract_info(txt.text)
                # import mongoDB
                if info_list[0] and info_list[1] and info_list[2]:
                    gfs.store_2_db(info_list[0], info_list[1], info_list[2], self.filename, txt.text)

            elif return_list[0] == -1:
                cprint(
                    "WARNING",
                    return_list[1]
                    + ",make sure the file is not empty and no space/special characters in file name as well.",
                )
            else:
                cprint("FAIL", return_list[1])

        elif self.suffix == "doc":
            cprint("HEADER", "Enter extract doc...")
            doc = ExtractDoc(self.filename)
            return_list = doc.extract_text_from_doc()
            if return_list[0] == 0:
                print doc.get_all_text()
                cprint("OK", return_list[1])
                # extract name, email, phone number
                info_list = self.extract_info(doc.text)
                # import mongoDB
                if info_list[0] and info_list[1] and info_list[2]:
                    gfs.store_2_db(info_list[0], info_list[1], info_list[2], self.filename, doc.text)
            elif return_list[0] == -1:
                cprint(
                    "WARNING",
                    return_list[1]
                    + ",make sure the file is not empty and no space/special characters in file name as well.",
                )
            else:
                cprint("FAIL", return_list[1])
        else:
            cprint(
                "WARNING", "Warning:Input a wrong formated file,currently,this tool only accept .pdf/.docx/.doc/.txt"
            )