def extendedProductExtraction(keyword = 'iphone 7', onlyFollowings = False, AllPageButId = False):
    import paths, SparkLogFileHandler, SearchExtractor, FinalizedRunners, NewProductPreferrer, PythonVersionHandler, Trainer
    outputFolder = paths.joinPath(paths.HDFSRootFolder, 'weekAugust')
    pairs = Trainer.readLabeledPairs(outputPath)
    ids = pairs.flatMap(lambda i: i[0]).distinct()
    PythonVersionHandler.print_logging(ids.count(), 'ids have been gathered from the labeled pairs by', PythonVersionHandler.nowStr())
    productVectorFolder = paths.newProductVectorFolder3
    products = Trainer.getProducts(ids, productVectorFolder)
    Trainer.saveSpecificProduct(products, productsPath)
def extendedPairs(keyword = 'iphone 7', onlyFollowings = False, AllPageButId = False):
    import paths, SparkLogFileHandler, SearchExtractor, FinalizedRunners, NewProductPreferrer, PythonVersionHandler, Trainer
    keyword_name = keyword.replace(' ', '_')
    outputFolder = paths.joinPath(paths.HDFSRootFolder, 'weekAugust')
    inputPath = paths.joinPath(outputFolder, keyword_name + '/' + keyword_name + '_extractedLogs')
    logs = FinalizedRunners.getPreparedLogsFromHDFS(inputPath, filtering = False)
    searchNProductLogs = SearchExtractor.searchNProductLogsForSingleKeyword(logs, keyword)
    pairs = NewProductPreferrer.trainingInstancesForSingleKeyword(searchNProductLogs, onlyFollowings = onlyFollowings, AllPageButId = AllPageButId)
    if pairs.isEmpty():
        return
    pairs = pairs.coalesce(24)
    outputPath, productsPath = getLabeledPairsAndProductsPath(outputFolder, keyword, onlyFollowings = onlyFollowings, AllPageButId = AllPageButId)
    SparkLogFileHandler.saveRDDToHDFS(pairs, outputPath)
    ids = pairs.flatMap(lambda i: i[0]).distinct()
    PythonVersionHandler.print_logging(ids.count(), 'ids have been gathered from the labeled pairs by', PythonVersionHandler.nowStr())
    productVectorFolder = paths.newProductVectorFolder3
    products = Trainer.getProducts(ids, productVectorFolder)
    Trainer.saveSpecificProduct(products, productsPath)