Beispiel #1
0
def getLine(g4):
    # the sorting mechanism.
    if not chartDetect(g4):
        g8 = list(g4)
        # gx=cleanUp(g8)
        g5 = list(
            sorted(
                list(
                    set(
                        list(
                            map((lambda y: int(
                                styleMap(
                                    list(
                                        map((lambda x: x.split(":")),
                                            list(
                                                filter(
                                                    (lambda x: len(x) > 0), y.
                                                    attrs["style"].split(";")))
                                            )))["top"][:-2])), g4))))))
        g6 = [g5[0] + 1, g5[-1] - 1]
        g7 = groupMe(g8, (lambda x: inNewRange(getStyleCode(x, "top"), g6)))
        # print(g7)
        # print(g7[1])
        g9 = g7[0]  # this is the main context.
        gx = cleanUp(g9)
        g10 = getBold(g9)
        g11 = subFix(
            list(
                sorted(
                    list(
                        set(
                            list(
                                map((lambda x: int(x[0]["left"][:-2])),
                                    g10[1])))))))
        # get two lists? no tolerance?
        if len(g11) == 2:
            g13 = g11[1] - 2
            g12 = groupMe(g9, (lambda x: getStyleCode(x, "left") < g13))
            x0, x1 = cleanUp(g12[0]), cleanUp(g12[1])
            x2, x3 = maxSort(
                g12[0],
                (lambda x: 1000 * programApprox(getStyleCode(x, "top"), x0) +
                 getStyleCode(x, "left"))), maxSort(
                     g12[1],
                     (lambda x: 1000 * programApprox(getStyleCode(
                         x, "top"), x1) + getStyleCode(x, "left")))
            # g12=maxSort(,(lambda x: 1000*programApprox(getStyleCode(x,"top"),gx)+getStyleCode(x,"left")))
            # two pages.
            # if do it at once, then we will have no issue.
            # let the computer create its code later.
            # no f*****g way.
            return x2 + x3
        else:
            # only one single list.
            return maxSort(g9, (lambda x: 1000 * programApprox(
                getStyleCode(x, "top"), gx) + getStyleCode(x, "left")))
    else:
        g8 = list(g4)
        g5 = list(
            sorted(
                list(
                    set(
                        list(
                            map((lambda y: int(
                                styleMap(
                                    list(
                                        map((lambda x: x.split(":")),
                                            list(
                                                filter(
                                                    (lambda x: len(x) > 0), y.
                                                    attrs["style"].split(";")))
                                            )))["top"][:-2])), g4))))))
        # print(g5)
        g6 = [g5[0] + 1, g5[-1] - 1]
        g7 = groupMe(g8, (lambda x: inNewRange(getStyleCode(x, "top"), g6)))
        # print(g7)
        # print(g7[1])
        # return the most possible result. the top value.
        g9 = g7[0]
        g10 = getBold(g9)
        g11 = subFix(
            list(
                sorted(
                    list(
                        set(
                            list(
                                map((lambda x: int(x[0]["left"][:-2])),
                                    g10[1])))))))
        if len(g11) == 1:
            g13 = g11[0] - 2
            g12 = maxSort(
                groupMe(g9, (lambda x: getStyleCode(x, "left") < g13)),
                (lambda x: 1000 * getStyleCode(x, "top") + getStyleCode(
                    x, "left")))
            # two pages.
            # if do it at once, then we will have no issue.
            # let the computer create its code later.
            # merge those with same traits.
            if chartDetect(g12[0]) and (not chartDetect(g12[1])):
                om = list(g12[1])
                mc = cleanUp(om)
                # print(om)
                # print("SPLITER I >>>>>>>>>>>")
                # print(mc)
                return maxSort(om, (lambda x: 1000 * programApprox(
                    getStyleCode(x, "top"), mc) + getStyleCode(x, "left")))
            elif chartDetect(g12[1]) and (not chartDetect(g12[0])):
                om = list(g12[0])
                mc = cleanUp(om)  #what the hell is going on?
                # print(om)
                # print("SPLITER II >>>>>>>>>>>")
                # print(mc)
                return maxSort(om, (lambda x: 1000 * programApprox(
                    getStyleCode(x, "top"), mc) + getStyleCode(x, "left")))
            else:
                return []
        else:
            return []
Beispiel #2
0
def getLine(g4):
    # the sorting mechanism.
    # how the f**k does that shit work?
    if not chartDetect(g4):
        g8 = list(g4)
        # gx=cleanUp(g8)
        g5 = list(
            sorted(
                list(
                    set(
                        list(
                            map((lambda y: int(
                                styleMap(
                                    list(
                                        map((lambda x: x.split(":")),
                                            list(
                                                filter(
                                                    (lambda x: len(x) > 0), y.
                                                    attrs["style"].split(";")))
                                            )))["top"][:-2])), g4))))))
        # print(g5)
        g6 = [g5[0] + 1, g5[-1] - 1]
        g7 = groupMe(g8, (lambda x: inNewRange(getStyleCode(x, "top"), g6)))
        # print(g7)
        # print(g7[1])
        g9 = g7[0]  # this is the main context.
        gx = cleanUp(g9)
        g10 = getBold(g9)
        g11 = subFix(
            list(
                sorted(
                    list(
                        set(
                            list(
                                map((lambda x: int(x[0]["left"][:-2])),
                                    g10[1])))))))
        # get two lists? no tolerance?
        if len(g11) == 2:
            g13, g14 = g11[1] - 2, (g11[1] - g11[0] + 10)
            g12 = maxSort(
                groupMe(g9, (lambda x: getStyleCode(x, "left") < g13)),
                (lambda x: g14 * programApprox(getStyleCode(x, "top"), gx) +
                 getStyleCode(x, "left")))
            # two pages.
            # if do it at once, then we will have no issue.
            # let the computer create its code later.
            return g12[0] + g12[1]
        else:
            return maxSort(g9, (lambda x: 250 * programApprox(
                getStyleCode(x, "top"), gx) + getStyleCode(x, "left")))
    else:
        g8 = list(g4)
        g5 = list(
            sorted(
                list(
                    set(
                        list(
                            map((lambda y: int(
                                styleMap(
                                    list(
                                        map((lambda x: x.split(":")),
                                            list(
                                                filter(
                                                    (lambda x: len(x) > 0), y.
                                                    attrs["style"].split(";")))
                                            )))["top"][:-2])), g4))))))
        # print(g5)
        g6 = [g5[0] + 1, g5[-1] - 1]
        g7 = groupMe(g8, (lambda x: inNewRange(getStyleCode(x, "top"), g6)))
        # print(g7)
        # print(g7[1])
        # return the most possible result. the top value.
        g9 = g7[0]
        g10 = getBold(g9)
        g11 = list(
            sorted(
                list(set(list(map((lambda x: int(x[0]["left"][:-2])),
                                  g10[1]))))))
        if len(g11) == 1:
            g13, g14 = g11[0] - 1, 250
            g12 = maxSort(
                groupMe(g9, (lambda x: getStyleCode(x, "left") < g13)),
                (lambda x: g14 * getStyleCode(x, "top") + getStyleCode(
                    x, "left")))
            # two pages.
            # if do it at once, then we will have no issue.
            # let the computer create its code later.
            if chartDetect(g12[0]) and (not chartDetect(g12[1])):
                om = g12[1]
                mc = cleanUp(om)
                return maxSort(om, (lambda x: g14 * programApprox(x, mc)))
            elif chartDetect(g12[1]) and (not chartDetect(g12[0])):
                om = g12[0]
                mc = cleanUp(om)
                return maxSort(om, (lambda x: g14 * programApprox(x, mc)))
            else:
                return []
        else:
            return []
Beispiel #3
0
def getSucked(ask):
    g=getCode(ask) # this is the beautifulsoup object.
    b=BeautifulSoup(g,features="lxml")
    try:
        g0=getElimination(ask) # how do we get the f*****g txt file?
        print(g0,"HAS PROFILE")
        g1=getRange(g0[1])
    except:
        # get the top one.
        # is there really a newline in pdf -> html? funny.
        bf=copy.copy(b)
        gt=list(bf("div")[0]("p"))
        # gtx=(lambda y:int(styleMap(list(map((lambda x: x.split(":")),list(filter((lambda x: len(x)>0),y.attrs["style"].split(";"))))))["top"][:-2]))
        gtx=(lambda y: int(styleMap(list(map((lambda x:x.split(":")),list(filter((lambda x:len(x)>0),y.attrs["style"].split(";"))))))["top"][:-2]))
        gz=list(sorted(map((lambda x: gtx(x)),gt)))[0]
        gf=list(filter((lambda y: gtx(y)==gz),gt))[0].text # get the text.
        g0=[gf,list(filter((lambda x: "Range" in x),list(map((lambda x: x.text),gt[:3]))))[0]]
        print(g0,"WITHOUT PROFILE") # so what the f**k?
        g1=getRange(g0[1]) # is this the object?
    g2=b("div")[1:]
    fn=[]
    for xv in range(len(g2)):
        g3=g2[xv]("p") # just a ResultSet.
        fn+=getLine(g3) # sort the line here.
    # print(fn)
    # problem starts before.
    # has the prefix.
    fs=WP(WN(fn)) # linear currently.
    # the wp still need improvements.
    # print(fs)
    ks=[None,[],[]]
    ks[0]=g0
    # with prelude as prefix?
    # how the f**k to store prelude? -> become words?
    # I believe most people would not like C code, so f**k this.
    # Cause this is not about the f**k.
    # we are gonna make it into an executable.
    # anyway f**k the program.
    if fs!=[]:
        px=preludeExtract(maxSplit(splitMe(fs,checkBold),checkAnother)) # the prefix is on the first group now.
        ks[1]=px[0]
        fx=px[1] # if this failed then f**k it. we have something empty there.
        # with first thing checked?
        # we are gonna do it again. store it into some sort of list.
        if checkFormat(fx): # so what the f**k?
            # what the f**k is going on?
            # print("LEVEL I <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
            # print(fx)
            # print("LEVEL I <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
            for mx in range(len(fx)//2):
                # this is one of those categories.
                # you even have another category?
                fm0=[None,[],[]] # it has to be something.
                # after we are done, put it into ks.
                # middle for prelude.
                rk,fm0[0]=preludeExtract(fx[2*mx+1]),fx[mx*2]# rk[0] is the prelude.
                rk0,fm0[1]=rk[1],rk[0] # this is the context.
                # only thing left is the fm[1].
                # what the f**k is rk0?
                # we have trouble here.
                if checkFormat(rk0):
                    # format of single category.
                    for mv in range(len(rk0)//2):
                        rd=[None,[],[]] # definition, other shits.
                    # check every individual shit.
                        rv=rk0[mv*2] # we have unified things here. one is mixed, one is not.
                        rkm=rk0[1+mv*2] # this is the test element.
                        rf=cleanUp(rkm)
                        rg=list(rv('span'))
                        # do we have prelude for each term? No.
                        rd[0]=rg[0] # is this the index? yes it is. with symbol skimmed.
                        rk1=rkm # we could have things here.
                        if len(rg)==1:
                            # how the f**k does that work?
                            # only one mysterious error.
                            try:
                                g0=loveIsGone(rk1,(lambda x: len(x.text)>1))
                                g1=remedyGroup(g0,(lambda x:len(x('span'))>1))
                                # regrouped.
                                # the name is in the first group.
                                # print(g1)
                                g2=getName(g1[0]) # what the f**k is this?
                                # print(g1[1])
                                rd[1]=g2[0] # name of char
                                # the first and the second one?
                                # how about use computer vision?
                                if g2[1]!=[] and g1[1]!=[]:
                                    rd[2]=list(filter((lambda x:x!=[]),processMe(g2[1])+processMe(g1[1]))) # rest of address.
                                elif g1[1]!=[]:
                                    rd[2]=list(filter((lambda x:x!=[]),processMe(g1[1])))
                                elif g2[1]!=[]:
                                    rd[2]=list(filter((lambda x:x!=[]),processMe(g2[1])))
                                else:
                                    rd[2]=None
                            except:
                                print("GOLDMINE >>>>>>>>>>>>>")
                                print(rv,rk1)
                                # final error.
                                print("FORMAT ERROR III")
                        else:
                            try:
                                # what the f**k is going on?
                                # check the type somehow then.
                                g0=loveIsGone(rk1,(lambda x: len(x.text)>1))
                                g1=remedyGroup(g0,(lambda x:len(x('span'))>1))
                                # the name is in the first group.
                                # print(g1)
                                g2=getName(g1[0])
                                # print(g1[1])
                                rd[1]=g2[0] # name of char
                                # the first and the second one?
                                if g2[1]!=[] and g1[1]!=[]:
                                    rd[2]=list(filter((lambda x:x!=[]),processMe(g2[1])+processMe(g1[1])))
                                elif g1[1]!=[]:
                                    rd[2]=list(filter((lambda x:x!=[]),processMe(g1[1])))
                                elif g2[1]!=[]:
                                    rd[2]=list(filter((lambda x:x!=[]),processMe(g2[1])))
                                else:
                                    rd[2]=None # if None or [] -> Missing.
                                    # will we have the f*****g exception?
                            except:
                                print("GOLDMINE >>>>>>>>>>>>>")
                                print(rv,rk1)
                                print("FORMAT ERROR IV")
                        fm0[2].append(rd)
                else:
                    zf=len(rk0)//2
                    if zf!=0:
                        for fz in range(zf):
                            print("HEAD >>>>>>>>>>>>")
                            print(rk0[fz*2])
                            print("TAIL >>>>>>>>>>>>")
                            print(rk0[fz*2+1])
                    else:
                        print("DIRECT DUMP >>>>>>>>>>>>")
                        print(rk0)
                        print("NO FULL LENGTH")
                    print("FORMAT ERROR II")
                ks[2].append(fm0)
        else:
            zf=len(fx)//2
            if zf!=0:
                for fz in range(zf):
                    print("HEAD >>>>>>>>>>>>")
                    print(fx[fz*2])
                    print("TAIL >>>>>>>>>>>>")
                    print(fx[fz*2+1])
            else:
                print("DIRECT DUMP >>>>>>>>>>>>")
                print(fx)
                print("NO FULL LENGTH")
            print("FORMAT ERROR I")
        return ks
    else:
        return ks
        print("EMPTY FILE")
Beispiel #4
0
# problem starts before.
fx = maxSplit(splitMe(fn, checkBold), checkAnother)
if checkFormat(fx):
    for mx in range(len(fx) // 2):
        # this is one of those categories.
        print(fx[mx * 2])
        rk = preludeExtract(fx[2 * mx + 1])  # rk[0] is the prelude.
        rk0 = rk[1]  # this is the context.
        if checkFormat(rk0):
            for mv in range(len(rk0) // 2):
                # check every individual shit.
                rv = rk0[
                    mv *
                    2]  # we have unified things here. one is mixed, one is not.
                rkm = rk0[1 + mv * 2]  # this is the test element.
                rf = cleanUp(rkm)
                # print(rf)
                rg = list(rv('span'))
                rs = rg[0]  # this is the index.
                rk1 = sortMe(rkm, (lambda x: 250 * programApprox(
                    getStyleCode(x, "top"), rf) + getStyleCode(x, "left")))
                if len(rg) == 1:
                    # how the f**k does that work?
                    if len(rk1[0].text) == 1:
                        g0 = loveIsGone(rk1, (lambda x: len(x.text) > 1))
                        g1 = remedyGroup(g0, (lambda x: len(x('span')) > 1))
                        # the name is in the first group.
                        # print(g1)
                        g2 = getName(g1[0])
                        # print(g1[1])
                        g4 = g2[0]  # name of char
Beispiel #5
0
def getSucked(ask):
    g = getCode(ask)  # this is the beautifulsoup object.
    b = BeautifulSoup(g, features="lxml")
    try:
        g0 = getElimination(ask)  # how do we get the f*****g txt file?
        print(g0, "HAS PROFILE")
        g1 = getRange(g0[1])
    except:
        g0 = list(
            filter((lambda x: "Range" in x),
                   list(map((lambda x: x.text),
                            list(b("div")[0]("p")[:3])))))[0]
        print(g0, "WITHOUT PROFILE")
        g1 = getRange(g0)  # is this the object?
    g2 = b("div")[1:]
    fn = []
    for xv in range(len(g2)):
        g3 = g2[xv]("p")  # just a ResultSet.
        fn += getLine(g3)  # sort the line here.
    # print(fn)
    # problem starts before.
    # has the prefix.
    fs = WP(WN(fn))  # linear currently.
    # the wp still need improvements.
    # print(fs)
    if fs != []:
        px = preludeExtract(
            maxSplit(splitMe(fs, checkBold),
                     checkAnother))  # the prefix is on the first group now.
        fx = px[
            1]  # if this failed then f**k it. we have something empty there.
        # with first thing checked?
        if checkFormat(fx):
            # what the f**k is going on?
            # print("LEVEL I <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
            # print(fx)
            # print("LEVEL I <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
            for mx in range(len(fx) // 2):
                # this is one of those categories.
                # you even have another category?
                print(fx[mx * 2])
                rk = preludeExtract(fx[2 * mx + 1])  # rk[0] is the prelude.
                rk0 = rk[1]  # this is the context.
                # what the f**k is rk0?
                # we have trouble here.
                if checkFormat(rk0):
                    for mv in range(len(rk0) // 2):
                        # check every individual shit.
                        rv = rk0[
                            mv *
                            2]  # we have unified things here. one is mixed, one is not.
                        rkm = rk0[1 + mv * 2]  # this is the test element.
                        rf = cleanUp(rkm)
                        rg = list(rv('span'))
                        rs = rg[0]  # this is the index.
                        rk1 = rkm
                        if len(rg) == 1:
                            # how the f**k does that work?
                            # only one mysterious error.
                            try:
                                g0 = loveIsGone(rk1,
                                                (lambda x: len(x.text) > 1))
                                g1 = remedyGroup(
                                    g0, (lambda x: len(x('span')) > 1))
                                # the name is in the first group.
                                # print(g1)
                                g2 = getName(g1[0])
                                # print(g1[1])
                                g4 = g2[0]  # name of char
                                # the first and the second one?
                                # how about use computer vision?
                                if g2[1] != [] and g1[1] != []:
                                    g3 = list(
                                        filter((lambda x: x != []),
                                               processMe(g2[1]) +
                                               processMe(g1[1])))
                                elif g1[1] != []:
                                    g3 = list(
                                        filter((lambda x: x != []),
                                               processMe(g1[1])))
                                elif g2[1] != []:
                                    g3 = list(
                                        filter((lambda x: x != []),
                                               processMe(g2[1])))
                                else:
                                    g3 = None
                            except:
                                print(rv, rk1)
                                print("FORMAT ERROR III")
                        else:
                            try:
                                # what the f**k is going on?
                                g0 = loveIsGone(rk1,
                                                (lambda x: len(x.text) > 1))
                                g1 = remedyGroup(
                                    g0, (lambda x: len(x('span')) > 1))
                                # the name is in the first group.
                                # print(g1)
                                g2 = getName(g1[0])
                                # print(g1[1])
                                g4 = g2[0]  # name of char
                                # the first and the second one?
                                if g2[1] != [] and g1[1] != []:
                                    g3 = list(
                                        filter((lambda x: x != []),
                                               processMe(g2[1]) +
                                               processMe(g1[1])))
                                elif g1[1] != []:
                                    g3 = list(
                                        filter((lambda x: x != []),
                                               processMe(g1[1])))
                                elif g2[1] != []:
                                    g3 = list(
                                        filter((lambda x: x != []),
                                               processMe(g2[1])))
                                else:
                                    g3 = None
                            except:
                                print(rv, rk1)
                                print("FORMAT ERROR IV")
                else:
                    zf = len(rk0) // 2
                    if zf != 0:
                        for fz in range(zf):
                            print("HEAD >>>>>>>>>>>>")
                            print(rk0[fz * 2])
                            print("TAIL >>>>>>>>>>>>")
                            print(rk0[fz * 2 + 1])
                    else:
                        print("DIRECT DUMP >>>>>>>>>>>>")
                        print(rk0)
                        print("NO FULL LENGTH")
                    print("FORMAT ERROR II")
        else:
            zf = len(fx) // 2
            if zf != 0:
                for fz in range(zf):
                    print("HEAD >>>>>>>>>>>>")
                    print(fx[fz * 2])
                    print("TAIL >>>>>>>>>>>>")
                    print(fx[fz * 2 + 1])
            else:
                print("DIRECT DUMP >>>>>>>>>>>>")
                print(fx)
                print("NO FULL LENGTH")
            print("FORMAT ERROR I")
    else:
        print("EMPTY FILE")  # just like that empty private U100000.